From 4fee66579617d69a8137bf44fcb46be28a2a7a75 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 12 Dec 2023 10:12:58 +0100 Subject: [PATCH 01/23] Add a dbt template --- libs/template/templates/dbt-sql/README.md | 8 ++ .../dbt-sql/databricks_template_schema.json | 46 +++++++ .../templates/dbt-sql/library/versions.tmpl | 7 ++ .../dbt-sql/template/__preamble.tmpl | 9 ++ .../.vscode/__builtins__.pyi | 3 + .../{{.project_name}}/.vscode/extensions.json | 8 ++ .../{{.project_name}}/.vscode/settings.json | 30 +++++ .../template/{{.project_name}}/README.md | 119 ++++++++++++++++++ .../{{.project_name}}/analyses/.gitkeep | 0 .../{{.project_name}}/databricks.yml.tmpl | 43 +++++++ .../{{.project_name}}/dbt_project.yml.tmpl | 36 ++++++ .../{{.project_name}}/macros/.gitkeep | 0 .../models/example/my_first_dbt_model.sql | 27 ++++ .../models/example/my_second_dbt_model.sql | 6 + .../models/example/schema.yml | 21 ++++ .../profile_template.yml.tmpl | 32 +++++ .../{{.project_name}}/requirements-dev.txt | 6 + .../resources/{{.project_name}}_job.yml.tmpl | 37 ++++++ .../template/{{.project_name}}/seeds/.gitkeep | 0 .../{{.project_name}}/snapshots/.gitkeep | 0 .../template/{{.project_name}}/tests/.gitkeep | 0 21 files changed, 438 insertions(+) create mode 100644 libs/template/templates/dbt-sql/README.md create mode 100644 libs/template/templates/dbt-sql/databricks_template_schema.json create mode 100644 libs/template/templates/dbt-sql/library/versions.tmpl create mode 100644 libs/template/templates/dbt-sql/template/__preamble.tmpl create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/__builtins__.pyi create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/README.md create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/analyses/.gitkeep create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/macros/.gitkeep create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/seeds/.gitkeep create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/snapshots/.gitkeep create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/tests/.gitkeep diff --git a/libs/template/templates/dbt-sql/README.md b/libs/template/templates/dbt-sql/README.md new file mode 100644 index 0000000000..9b750bf560 --- /dev/null +++ b/libs/template/templates/dbt-sql/README.md @@ -0,0 +1,8 @@ +# dbt template + +This folder provides a template for using dbt-core with Databricks Asset Bundles. +It follows the standard dbt project structure and has an additional `resources` +directory to define Databricks resources such as jobs that run dbt models. + +* Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json new file mode 100644 index 0000000000..4ec8f36a07 --- /dev/null +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -0,0 +1,46 @@ +{ + "welcome_message": "\nWelcome to the dbt template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "pattern": "^[A-Za-z_][A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores.", + "default": "my_dbt_project", + "description": "\nPlease provide a unique name for this project.\nproject_name", + "order": 1 + }, + "workspace_host_override": { + "comment": "We explicitly ask users for the workspace_host since we ask for a http_path below. A downside of doing this is that {{user_name}} may not be correct if they pick a different workspace than the one from the current profile.", + "type": "string", + "pattern": "^https:\\/\\/[^/]+$", + "pattern_match_failure_message": "URL must be of the form https://my.databricks.host", + "description": "\nPlease provide the workspace URL to use.\nworkspace_url", + "default": "{{workspace_host}}", + "order": 2 + }, + "http_path": { + "type": "string", + "pattern": "^/sql/.\\../warehouses/[a-z0-9]+$", + "pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/abcdef1234567890", + "description": "\nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development\nYou can find this path by clicking on \"Connection Details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", + "order": 3 + }, + "catalog": { + "type": "string", + "default": "", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "\nPlease provide an initial catalog (leave blank if you would not want to use an initial catalog).\ncatalog", + "order": 4 + }, + "schema": { + "type": "string", + "default": "default", + "pattern": "^\\w+$", + "pattern_match_failure_message": "Invalid schema name.", + "description": "\nPlease provide a default schema for this project.\nNote that you can pick a different schema for local development when you first use the 'dbt init' command.\nschema", + "order": 4 + } + }, + "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nPlease refer to the README.md file for \"getting started\" instructions." +} diff --git a/libs/template/templates/dbt-sql/library/versions.tmpl b/libs/template/templates/dbt-sql/library/versions.tmpl new file mode 100644 index 0000000000..f9a879d25a --- /dev/null +++ b/libs/template/templates/dbt-sql/library/versions.tmpl @@ -0,0 +1,7 @@ +{{define "latest_lts_dbr_version" -}} + 13.3.x-scala2.12 +{{- end}} + +{{define "latest_lts_db_connect_version_spec" -}} + >=13.3,<13.4 +{{- end}} diff --git a/libs/template/templates/dbt-sql/template/__preamble.tmpl b/libs/template/templates/dbt-sql/template/__preamble.tmpl new file mode 100644 index 0000000000..b770b5ef94 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/__preamble.tmpl @@ -0,0 +1,9 @@ +# Preamble + +This file only template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +{{if eq .project_name "dbt"}} +{{fail "Project name 'dbt' is not supported"}} +{{end}} \ No newline at end of file diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/__builtins__.pyi b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json new file mode 100644 index 0000000000..a1dc5efed9 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json @@ -0,0 +1,8 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml", + "databricks.sqltools-databricks-driver", + ] +} diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json new file mode 100644 index 0000000000..889e5dc0c8 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json @@ -0,0 +1,30 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "python.envFile": "${workspaceFolder}/.databricks/.databricks.env", + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "hive_metastore", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/1.0/warehouses/ec7fa4bd0f0afc8f" + } + ], + "sqltools.autoConnectTo": "", +} diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md new file mode 100644 index 0000000000..cc003dda6b --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md @@ -0,0 +1,119 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the dbt template for +Databricks Asset Bundles. It follows the standard dbt project structure +and has an additional `resources` directory to define Databricks resources such as jobs +that run dbt models. + +* Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html + +## Development setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace: + ``` + $ databricks configure + ``` + +3. Install dbt + + To install dbt, you need a recent version of Python. For the instructions below, + we assume `python3` refers to the Python version you want to use. On some systems, + you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. + + Run these instructions from the `{{.project_name}}` directory. We recommend making + use of a Python virtual environment and installing dbt as follows: + + ``` + $ python3 -m venv .venv + $ . .venv/bin/activate + $ pip install -r requirements-dev.txt + ``` + +4. Initialize your dbt profile + + Use `dbt init` to initialize your profile. + + ``` + $ dbt init + ``` + + Note that dbt authentication uses personal access tokens by default + (see https://docs.databricks.com/dev-tools/auth/pat.html). + You can use OAuth as an alternative, but this currently requires manual configuration. + See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md + for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605 + for advice on setting up OAuth for Azure Databricks. + + To setup up additional profiles, such as a 'prod' profile, + see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles. + +5. Activate dbt so it can be used from the terminal + + ``` + $ . .venv/bin/activate + ``` + +## Local development with dbt + +Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html): + +``` +$ dbt seed +$ dbt run +``` + +(Did you get an error that the dbt command could not be found? You may need +to try the last step from the development setup above to re-activate +your Python virtual environment!) + +Use `dbt test` to run tests generated from yml files such as `models/schema.yml` +and any SQL tests from `tests/` + +``` +$ dbt test +``` + +## Deploying to Databricks with Databricks Asset Bundles + +Databricks Asset Bundles can be used to deploy to Databricks and to execute +dbt commands as a job using Databricks Workflows. See +https://docs.databricks.com/dev-tools/bundles/index.html to learn more. + +Use the Databricks CLI to deploy a development copy of this project to a workspace: + +``` +$ databricks bundle deploy --target dev +``` + +(Note that "dev" is the default target, so the `--target` parameter +is optional here.) + +This deploys everything that's defined for this project. +For example, the default template would deploy a job called +`[dev yourname] {{.project_name}}_job` to your workspace. +You can find that job by opening your workpace and clicking on **Workflows**. + +To run the deployed job, use the "run" command: +``` +$ databricks bundle run --targed dev +``` + +To deploy a production copy, type: + +``` +$ databricks bundle deploy --target prod +``` + +## IDE support + +Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions +related to dbt may further enhance your dbt development experience! + +## CI/CD + +See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. \ No newline at end of file diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/analyses/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/analyses/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 0000000000..4fb95307a8 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,43 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + +include: + - resources/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + warehouse_id: + description: The warehouse to use + catalog: + description: The catalog to use + schema: + description: The schema to use + +# Deployment targets. +targets: + dev: + default: true + mode: development + workspace: + host: {{.workspace_host_override}} + variables: + warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} + catalog: {{.catalog}} + schema: {{.schema}} # tip: use ${workspace.current_user.short_name} if you want your own schema + + prod: + mode: production + workspace: + host: {{.workspace_host_override}} + variables: + warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} + catalog: {{.catalog}} + schema: {{.schema}} + {{- if not is_service_principal}} + run_as: + # This runs as {{user_name}} in production. We could also use a service principal here + # using service_principal_name (see the Databricks documentation). + user_name: {{user_name}} + {{- end}} \ No newline at end of file diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl new file mode 100644 index 0000000000..7b51d8da82 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl @@ -0,0 +1,36 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: '{{.project_name}}' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: '{{.project_name}}' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{"{{"}} config(...) {{"}}"}}` macro. +models: + {{.project_name}}: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/macros/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/macros/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql new file mode 100644 index 0000000000..f31a12d948 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql @@ -0,0 +1,27 @@ + +/* + Welcome to your first dbt model! + Did you know that you can also configure models directly within SQL files? + This will override configurations stated in dbt_project.yml + + Try changing "table" to "view" below +*/ + +{{ config(materialized='table') }} + +with source_data as ( + + select 1 as id + union all + select null as id + +) + +select * +from source_data + +/* + Uncomment the line below to remove records with null `id` values +*/ + +-- where id is not null diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql new file mode 100644 index 0000000000..c91f8793a5 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql @@ -0,0 +1,6 @@ + +-- Use the `ref` function to select from other models + +select * +from {{ ref('my_first_dbt_model') }} +where id = 1 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml new file mode 100644 index 0000000000..2a53081715 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl new file mode 100644 index 0000000000..355786a7c1 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl @@ -0,0 +1,32 @@ +# This file defines prompts with defaults for dbt initializaton. +# It is used when the `dbt init` command is invoked. +# +fixed: + type: databricks +prompts: + host: + default: {{(regexp "^https?://").ReplaceAllString .workspace_host_override ""}} + token: + hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + hide_input: true + {{- if eq .catalog ""}} + _choose_unity_catalog: + 'use Unity Catalog': + catalog: + hint: 'initial catalog' + 'do not use Unity Catalog': + _fixed_catalog: null + {{- else}} + http_path: + default: {{.http_path}} + catalog: + hint: 'initial catalog' + default: {{.catalog}} + {{- end}} + schema: + hint: 'default schema where dbt will build objects, e.g. lennart_dev' + default: {{.schema}} + threads: + hint: '1 or more' + type: 'int' + default: 4 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt b/libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt new file mode 100644 index 0000000000..3363b96a7b --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt @@ -0,0 +1,6 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +dbt-databricks>=1.0.0,<2.0.0 \ No newline at end of file diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl new file mode 100644 index 0000000000..40a946c26d --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -0,0 +1,37 @@ +resources: + jobs: + {{.project_name}}_job: + name: {{.project_name}}_job + + schedule: + # Run every day at 9:27 AM + quartz_cron_expression: 21 27 9 * * ? + timezone_id: UTC + + email_notifications: + on_failure: + - {{user_name}} + + tasks: + - task_key: dbt + dbt_task: + project_directory: .. # use the root of this project + commands: + - dbt deps + - dbt seed + - dbt run + warehouse_id: ${var.warehouse_id} + catalog: ${var.catalog} + schema: ${var.schema} + new_cluster: + spark_version: {{template "latest_lts_dbr_version"}} + node_type_id: {{smallest_node_type}} + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode + libraries: + - pypi: + package: dbt-databricks>=1.0.0,<2.0.0 \ No newline at end of file diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/seeds/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/seeds/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/snapshots/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/snapshots/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/tests/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/tests/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 From b3a5ef83642f61c4b715ae7f655293dbe4ab909b Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Wed, 13 Dec 2023 09:35:45 +0100 Subject: [PATCH 02/23] Use a template for VS Code settings --- .../.vscode/{settings.json => settings.json.tmpl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/{settings.json => settings.json.tmpl} (100%) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json rename to libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl From c81d1391fea98b3305181ccd45571c05cc5f4425 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 19 Dec 2023 10:27:19 +0100 Subject: [PATCH 03/23] Tweak message --- .../template/{{.project_name}}/profile_template.yml.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl index 355786a7c1..98b8553b3a 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl @@ -24,7 +24,7 @@ prompts: default: {{.catalog}} {{- end}} schema: - hint: 'default schema where dbt will build objects, e.g. lennart_dev' + hint: 'default schema where dbt will build objects, e.g. {{.schema}} or lennart_dev' default: {{.schema}} threads: hint: '1 or more' From c900cfffef6a8ee41ed05183be5cf320f7494289 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Tue, 19 Dec 2023 10:32:32 +0100 Subject: [PATCH 04/23] Update --- bundle/config/mutator/populate_current_user.go | 6 +++--- libs/template/helpers.go | 15 +++++++++++++++ .../dbt-sql/databricks_template_schema.json | 9 --------- .../{{.project_name}}/databricks.yml.tmpl | 6 +++--- .../{{.project_name}}/profile_template.yml.tmpl | 4 ++-- 5 files changed, 23 insertions(+), 17 deletions(-) diff --git a/bundle/config/mutator/populate_current_user.go b/bundle/config/mutator/populate_current_user.go index 5b5d30969d..22d9c18106 100644 --- a/bundle/config/mutator/populate_current_user.go +++ b/bundle/config/mutator/populate_current_user.go @@ -33,7 +33,7 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error } b.Config.Workspace.CurrentUser = &config.User{ - ShortName: getShortUserName(me.UserName), + ShortName: GetShortUserName(me.UserName), User: me, } @@ -53,7 +53,7 @@ func replaceNonAlphanumeric(r rune) rune { // Get a short-form username, based on the user's primary email address. // We leave the full range of unicode letters in tact, but remove all "special" characters, // including dots, which are not supported in e.g. experiment names. -func getShortUserName(emailAddress string) string { - local, _, _ := strings.Cut(emailAddress, "@") +func GetShortUserName(userName string) string { + local, _, _ := strings.Cut(userName, "@") return strings.Map(replaceNonAlphanumeric, local) } diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 7f306a3aa5..c01fa2b652 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -9,6 +9,7 @@ import ( "regexp" "text/template" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/auth" "github.com/databricks/databricks-sdk-go/service/iam" @@ -98,6 +99,20 @@ func loadHelpers(ctx context.Context) template.FuncMap { } return result, nil }, + "short_name": func() (string, error) { + if cachedUser == nil { + var err error + cachedUser, err = w.CurrentUser.Me(ctx) + if err != nil { + return "", err + } + } + result := mutator.GetShortUserName(cachedUser.UserName) + if result == "" { + result = cachedUser.Id + } + return result, nil + }, "is_service_principal": func() (bool, error) { if cachedIsServicePrincipal != nil { return *cachedIsServicePrincipal, nil diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 4ec8f36a07..5c2ead9062 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -9,15 +9,6 @@ "description": "\nPlease provide a unique name for this project.\nproject_name", "order": 1 }, - "workspace_host_override": { - "comment": "We explicitly ask users for the workspace_host since we ask for a http_path below. A downside of doing this is that {{user_name}} may not be correct if they pick a different workspace than the one from the current profile.", - "type": "string", - "pattern": "^https:\\/\\/[^/]+$", - "pattern_match_failure_message": "URL must be of the form https://my.databricks.host", - "description": "\nPlease provide the workspace URL to use.\nworkspace_url", - "default": "{{workspace_host}}", - "order": 2 - }, "http_path": { "type": "string", "pattern": "^/sql/.\\../warehouses/[a-z0-9]+$", diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index 4fb95307a8..b0695fff7a 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -21,7 +21,7 @@ targets: default: true mode: development workspace: - host: {{.workspace_host_override}} + host: {{workspace_host}} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} catalog: {{.catalog}} @@ -30,7 +30,7 @@ targets: prod: mode: production workspace: - host: {{.workspace_host_override}} + host: {{workspace_host}} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} catalog: {{.catalog}} @@ -40,4 +40,4 @@ targets: # This runs as {{user_name}} in production. We could also use a service principal here # using service_principal_name (see the Databricks documentation). user_name: {{user_name}} - {{- end}} \ No newline at end of file + {{- end}} diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl index 98b8553b3a..7ef2016af9 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl @@ -5,7 +5,7 @@ fixed: type: databricks prompts: host: - default: {{(regexp "^https?://").ReplaceAllString .workspace_host_override ""}} + default: {{(regexp "^https?://").ReplaceAllString workspace_host ""}} token: hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' hide_input: true @@ -24,7 +24,7 @@ prompts: default: {{.catalog}} {{- end}} schema: - hint: 'default schema where dbt will build objects, e.g. {{.schema}} or lennart_dev' + hint: 'default schema where dbt will build objects, e.g. {{.schema}} or {{short_name}}_dev' default: {{.schema}} threads: hint: '1 or more' From 16f26a370521ae6b634a310f79d4f698cdd70a5d Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sun, 24 Dec 2023 16:32:52 +0100 Subject: [PATCH 05/23] Add tests --- libs/template/renderer_test.go | 22 ++++++++++++++----- .../dbt-sql/databricks_template_schema.json | 2 +- .../{{.project_name}}/databricks.yml.tmpl | 1 + 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index 8d0c21010b..766d326b91 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -37,10 +37,10 @@ func assertFilePermissions(t *testing.T, path string, perm fs.FileMode) { assert.Equal(t, perm, info.Mode().Perm()) } -func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target string, isServicePrincipal bool, build bool, tempDir string) { +func assertBuiltinTemplateValid(t *testing.T, template string, settings map[string]any, target string, isServicePrincipal bool, build bool, tempDir string) { ctx := context.Background() - templatePath, err := prepareBuiltinTemplates("default-python", tempDir) + templatePath, err := prepareBuiltinTemplates(template, tempDir) require.NoError(t, err) libraryPath := filepath.Join(templatePath, "library") @@ -98,7 +98,7 @@ func TestPrepareBuiltInTemplatesWithRelativePaths(t *testing.T) { assert.Equal(t, "./default-python", dir) } -func TestBuiltinTemplateValid(t *testing.T) { +func TestBuiltinPythonTemplateValid(t *testing.T) { // Test option combinations options := []string{"yes", "no"} isServicePrincipal := false @@ -114,7 +114,7 @@ func TestBuiltinTemplateValid(t *testing.T) { "include_python": includePython, } tempDir := t.TempDir() - assertBuiltinTemplateValid(t, config, "dev", isServicePrincipal, build, tempDir) + assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir) } } } @@ -136,10 +136,22 @@ func TestBuiltinTemplateValid(t *testing.T) { require.NoError(t, err) defer os.RemoveAll(tempDir) - assertBuiltinTemplateValid(t, config, "prod", isServicePrincipal, build, tempDir) + assertBuiltinTemplateValid(t, "default-python", config, "prod", isServicePrincipal, build, tempDir) defer os.RemoveAll(tempDir) } +func TestBuiltinDbtTemplateValid(t *testing.T) { + // Test prod mode + build + config := map[string]any{ + "project_name": "my_project", + "http_path": "/sql/warehouses/123", + "catalog": "hive_metastore", + "schema": "lennart", + } + build := false + assertBuiltinTemplateValid(t, "dbt-sql", config, "dev", true, build, t.TempDir()) + assertBuiltinTemplateValid(t, "dbt-sql", config, "prod", false, build, t.TempDir()) +} func TestRendererWithAssociatedTemplateInLibrary(t *testing.T) { tmpDir := t.TempDir() diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 5c2ead9062..8a090dfdd3 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -21,7 +21,7 @@ "default": "", "pattern": "^\\w*$", "pattern_match_failure_message": "Invalid catalog name.", - "description": "\nPlease provide an initial catalog (leave blank if you would not want to use an initial catalog).\ncatalog", + "description": "\nPlease provide an initial catalog (leave blank to use the workspace's default catalog).\ncatalog", "order": 4 }, "schema": { diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index b0695fff7a..e593d793e6 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -31,6 +31,7 @@ targets: mode: production workspace: host: {{workspace_host}} + root_path: /Shared/.bundle/prod/${bundle.name} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} catalog: {{.catalog}} From d85d4c42dd6388ef2a6a8dacbc6d6f076621bc7f Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 28 Dec 2023 13:33:40 +0100 Subject: [PATCH 06/23] Fix test --- bundle/config/mutator/populate_current_user_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/mutator/populate_current_user_test.go b/bundle/config/mutator/populate_current_user_test.go index bbb65e079f..90076cc5bb 100644 --- a/bundle/config/mutator/populate_current_user_test.go +++ b/bundle/config/mutator/populate_current_user_test.go @@ -71,6 +71,6 @@ func TestGetShortUserName(t *testing.T) { } for _, tt := range tests { - assert.Equal(t, tt.expected, getShortUserName(tt.email)) + assert.Equal(t, tt.expected, GetShortUserName(tt.email)) } } From 9030f56d20e129e88d703517f1501d36042bce11 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 13 Jan 2024 12:34:03 +0100 Subject: [PATCH 07/23] Add template --- cmd/bundle/init.go | 4 ++++ cmd/bundle/init_test.go | 2 ++ 2 files changed, 6 insertions(+) diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index db8250d071..2434606ffe 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -34,6 +34,10 @@ var nativeTemplates = []nativeTemplate{ name: "default-python", description: "The default Python template for Notebooks / Delta Live Tables / Workflows", }, + { + name: "dbt-sql", + description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", + }, { name: "mlops-stacks", gitUrl: "https://github.com/databricks/mlops-stacks", diff --git a/cmd/bundle/init_test.go b/cmd/bundle/init_test.go index aa89915968..ca97d63068 100644 --- a/cmd/bundle/init_test.go +++ b/cmd/bundle/init_test.go @@ -30,6 +30,7 @@ func TestBundleInitRepoName(t *testing.T) { func TestNativeTemplateOptions(t *testing.T) { expected := []cmdio.Tuple{ {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, + {Name: "dbt-sql", Id: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, {Name: "custom...", Id: "Bring your own template"}, } @@ -38,6 +39,7 @@ func TestNativeTemplateOptions(t *testing.T) { func TestNativeTemplateHelpDescriptions(t *testing.T) { expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- dbt-sql: The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) - mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` assert.Equal(t, expected, nativeTemplateHelpDescriptions()) } From 45ea8dbb8bfede301cdc60bbab3482ac64dca165 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 13 Jan 2024 12:52:43 +0100 Subject: [PATCH 08/23] Improve catalog handling --- .../template/{{.project_name}}/databricks.yml.tmpl | 8 ++++++++ .../{{.project_name}}/profile_template.yml.tmpl | 11 ++++++----- .../resources/{{.project_name}}_job.yml.tmpl | 12 ++++++------ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index e593d793e6..fae88c077e 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -24,7 +24,11 @@ targets: host: {{workspace_host}} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} + {{- if eq .catalog ""}} + catalog: "" # workspace default + {{- else}} catalog: {{.catalog}} + {{- end}} schema: {{.schema}} # tip: use ${workspace.current_user.short_name} if you want your own schema prod: @@ -34,7 +38,11 @@ targets: root_path: /Shared/.bundle/prod/${bundle.name} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} + {{- if eq .catalog ""}} + catalog: "" # workspace default + {{- else}} catalog: {{.catalog}} + {{- end}} schema: {{.schema}} {{- if not is_service_principal}} run_as: diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl index 7ef2016af9..3f3b1da519 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl @@ -9,16 +9,17 @@ prompts: token: hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' hide_input: true + http_path: + hint 'HTTP path of SQL warehouse to use' + default: {{.http_path}} {{- if eq .catalog ""}} _choose_unity_catalog: - 'use Unity Catalog': + 'use the workspace default catalog (or do not use Unity Catalog)': + _fixed_catalog: null + 'specify a default catalog': catalog: hint: 'initial catalog' - 'do not use Unity Catalog': - _fixed_catalog: null {{- else}} - http_path: - default: {{.http_path}} catalog: hint: 'initial catalog' default: {{.catalog}} diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index 40a946c26d..0aa6601295 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -2,21 +2,21 @@ resources: jobs: {{.project_name}}_job: name: {{.project_name}}_job - + schedule: # Run every day at 9:27 AM quartz_cron_expression: 21 27 9 * * ? timezone_id: UTC - + email_notifications: on_failure: - {{user_name}} - + tasks: - task_key: dbt dbt_task: - project_directory: .. # use the root of this project - commands: + project_directory: ../ + commands: - dbt deps - dbt seed - dbt run @@ -34,4 +34,4 @@ resources: ResourceClass: SingleNode libraries: - pypi: - package: dbt-databricks>=1.0.0,<2.0.0 \ No newline at end of file + package: dbt-databricks>=1.0.0,<2.0.0 From 0268c88b3b44b23a00bd119a151d4e2e6c5bf5d0 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 13 Jan 2024 17:17:38 +0100 Subject: [PATCH 09/23] Minor tweaks --- .../templates/dbt-sql/databricks_template_schema.json | 2 +- .../template/{{.project_name}}/.vscode/extensions.json | 4 +--- .../template/{{.project_name}}/.vscode/settings.json.tmpl | 5 ++++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 8a090dfdd3..276408656e 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -33,5 +33,5 @@ "order": 4 } }, - "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nPlease refer to the README.md file for \"getting started\" instructions." + "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nIf you already have dbt installed, just type 'dbt init' to get started.\nPlease refer to the README.md file for full \"getting started\" instructions.\n" } diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json index a1dc5efed9..28fe943fdb 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json @@ -1,8 +1,6 @@ { "recommendations": [ - "databricks.databricks", - "ms-python.vscode-pylance", "redhat.vscode-yaml", - "databricks.sqltools-databricks-driver", + "innoverio.vscode-dbt-power-user", ] } diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl index 889e5dc0c8..562ba136f5 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl @@ -23,8 +23,11 @@ "previewLimit": 50, "driver": "Databricks", "name": "databricks", - "path": "/sql/1.0/warehouses/ec7fa4bd0f0afc8f" + "path": "{{.http_path}}" } ], "sqltools.autoConnectTo": "", + "[jinja-sql]": { + "editor.defaultFormatter": "innoverio.vscode-dbt-power-user" + } } From 94ebd9adc5d12e45711b526a1dcf37da0d000bd1 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 20 Jan 2024 18:52:19 +0100 Subject: [PATCH 10/23] Update template to use materialized views & streaming tables --- .../{README.md => README.md.tmpl} | 13 ++++++--- .../{{.project_name}}/dbt_project.yml.tmpl | 8 ++---- .../models/example/my_first_dbt_model.sql | 27 ------------------- .../models/example/my_second_dbt_model.sql | 6 ----- .../models/example/orders_daily.sql | 10 +++++++ .../models/example/orders_raw.sql | 14 ++++++++++ .../models/example/schema.yml | 16 +++++------ .../{{.project_name}}/requirements-dev.txt | 5 +--- .../template/{{.project_name}}/README.md.tmpl | 5 ++++ 9 files changed, 49 insertions(+), 55 deletions(-) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{README.md => README.md.tmpl} (92%) delete mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql delete mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl similarity index 92% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/README.md rename to libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index cc003dda6b..705a47615a 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -22,7 +22,7 @@ that run dbt models. To install dbt, you need a recent version of Python. For the instructions below, we assume `python3` refers to the Python version you want to use. On some systems, you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. - + Run these instructions from the `{{.project_name}}` directory. We recommend making use of a Python virtual environment and installing dbt as follows: @@ -39,7 +39,7 @@ that run dbt models. ``` $ dbt init ``` - + Note that dbt authentication uses personal access tokens by default (see https://docs.databricks.com/dev-tools/auth/pat.html). You can use OAuth as an alternative, but this currently requires manual configuration. @@ -98,7 +98,7 @@ You can find that job by opening your workpace and clicking on **Workflows**. To run the deployed job, use the "run" command: ``` -$ databricks bundle run --targed dev +$ databricks bundle run --target dev ``` To deploy a production copy, type: @@ -107,6 +107,11 @@ To deploy a production copy, type: $ databricks bundle deploy --target prod ``` +Note that the job from the default template comes with a schedule that runs +every day (defined in resources/{{.project_name}}_job.yml). The schedule +is paused when deploying in development mode (see +https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + ## IDE support Optionally, install developer tools such as the Databricks extension for Visual Studio Code from @@ -116,4 +121,4 @@ related to dbt may further enhance your dbt development experience! ## CI/CD See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation -on CI/CD setup. \ No newline at end of file +on CI/CD setup. diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl index 7b51d8da82..560db52d46 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl @@ -1,7 +1,3 @@ - -# Name your project! Project names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models name: '{{.project_name}}' version: '1.0.0' config-version: 2 @@ -27,8 +23,8 @@ clean-targets: # directories to be removed by `dbt clean` # Full documentation: https://docs.getdbt.com/docs/configuring-models # In this example config, we tell dbt to build all models in the example/ -# directory as views. These settings can be overridden in the individual model -# files using the `{{"{{"}} config(...) {{"}}"}}` macro. +# directory as views by default. These settings can be overridden in the +# individual model files using the `{{"{{"}} config(...) {{"}}"}}` macro. models: {{.project_name}}: # Config indicated by + and applies to all files under models/example/ diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql deleted file mode 100644 index f31a12d948..0000000000 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_first_dbt_model.sql +++ /dev/null @@ -1,27 +0,0 @@ - -/* - Welcome to your first dbt model! - Did you know that you can also configure models directly within SQL files? - This will override configurations stated in dbt_project.yml - - Try changing "table" to "view" below -*/ - -{{ config(materialized='table') }} - -with source_data as ( - - select 1 as id - union all - select null as id - -) - -select * -from source_data - -/* - Uncomment the line below to remove records with null `id` values -*/ - --- where id is not null diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql deleted file mode 100644 index c91f8793a5..0000000000 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/my_second_dbt_model.sql +++ /dev/null @@ -1,6 +0,0 @@ - --- Use the `ref` function to select from other models - -select * -from {{ ref('my_first_dbt_model') }} -where id = 1 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql new file mode 100644 index 0000000000..3e8605fb6d --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql @@ -0,0 +1,10 @@ +-- Example materialized view +-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'materialized_view') }} + + +select order_date, count(*) AS number_of_orders +from {{ ref('orders_raw') }} +where order_date is not null +group by order_date diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql new file mode 100644 index 0000000000..dd2befb445 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql @@ -0,0 +1,14 @@ +-- Example streaming table to ingest /databricks-datasets/retail-org/sales_orders/*.json +-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'streaming_table') }} + +select + customer_name, + date(timestamp(from_unixtime(order_datetime))) as order_date, + order_number +from stream read_files( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml index 2a53081715..d34b9e6452 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml @@ -2,20 +2,20 @@ version: 2 models: - - name: my_first_dbt_model - description: "A starter dbt model" + - name: orders_raw + description: "Raw ingested orders" columns: - - name: id - description: "The primary key for this table" + - name: customer_name + description: "The name of a customer" tests: - unique - not_null - - name: my_second_dbt_model - description: "A starter dbt model" + - name: orders_daily + description: "Number of orders by day" columns: - - name: id - description: "The primary key for this table" + - name: order_date + description: "The date on which orders took place" tests: - unique - not_null diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt b/libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt index 3363b96a7b..10d7b9f10d 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/requirements-dev.txt @@ -1,6 +1,3 @@ ## requirements-dev.txt: dependencies for local development. -## -## For defining dependencies used by jobs in Databricks Workflows, see -## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html -dbt-databricks>=1.0.0,<2.0.0 \ No newline at end of file +dbt-databricks>=1.0.0,<2.0.0 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 476c1cd6cd..b45dce176e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -28,6 +28,11 @@ The '{{.project_name}}' project was generated by using the default-python templa $ databricks bundle deploy --target prod ``` + Note that the job from the default template comes with a schedule that runs + every day (defined in resources/{{.project_name}}_job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + 5. To run a job or pipeline, use the "run" command: ``` $ databricks bundle run From 14bc1fa2597bc0b7dac8bca21b53b75aac991814 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 20 Jan 2024 22:11:14 +0100 Subject: [PATCH 11/23] Add conditional --- .../template/{{.project_name}}/databricks.yml.tmpl | 2 +- .../{{.project_name}}/models/example/orders_daily.sql | 8 +++++++- .../resources/{{.project_name}}_job.yml.tmpl | 6 +++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index fae88c077e..3b1aeac836 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -35,7 +35,7 @@ targets: mode: production workspace: host: {{workspace_host}} - root_path: /Shared/.bundle/prod/${bundle.name} + root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} {{- if eq .catalog ""}} diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql index 3e8605fb6d..03e6a5d1c9 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql @@ -5,6 +5,12 @@ select order_date, count(*) AS number_of_orders + from {{ ref('orders_raw') }} -where order_date is not null + +-- Process a smaller range unless we're in 'prod' mode +{% if var('bundle_target', 'default') != 'prod' %} +where created_at >= '2019-08-01' and created_at < '2019-09-01' +{% endif %} + group by order_date diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index 0aa6601295..4ea0eab48e 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -17,9 +17,9 @@ resources: dbt_task: project_directory: ../ commands: - - dbt deps - - dbt seed - - dbt run + - 'dbt deps' + - 'dbt seed' + - 'dbt run --vars "{ "bundle_target": "${bundle.target}" }"' warehouse_id: ${var.warehouse_id} catalog: ${var.catalog} schema: ${var.schema} From 1501298eb2ec30dc9b97fbb4533b6677807d6d64 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 20 Jan 2024 22:51:27 +0100 Subject: [PATCH 12/23] Improve template --- .../template/{{.project_name}}/README.md.tmpl | 4 ++-- .../{orders_daily.sql => orders_daily.sql.tmpl} | 14 ++++++++------ .../{orders_raw.sql => orders_raw.sql.tmpl} | 8 ++++++-- .../{{.project_name}}/profile_template.yml.tmpl | 2 +- .../template/{{.project_name}}/README.md.tmpl | 4 ++-- 5 files changed, 19 insertions(+), 13 deletions(-) rename libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/{orders_daily.sql => orders_daily.sql.tmpl} (50%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/{orders_raw.sql => orders_raw.sql.tmpl} (59%) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index 705a47615a..5a700b3197 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -107,8 +107,8 @@ To deploy a production copy, type: $ databricks bundle deploy --target prod ``` -Note that the job from the default template comes with a schedule that runs -every day (defined in resources/{{.project_name}}_job.yml). The schedule +Note that the default job from the template has a schedule that runs every day +(defined in resources/{{.project_name}}_job.yml). The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql.tmpl similarity index 50% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql rename to libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql.tmpl index 03e6a5d1c9..29d32a19ab 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql.tmpl @@ -1,16 +1,18 @@ --- Example materialized view +-- This model file defines a materialized view called 'orders_daily' +-- in the catalog/schema selected in resources/{{.project_name}}_job.yml or +-- in the current dbt profile during development. +-- -- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables -- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. -{{ config(materialized = 'materialized_view') }} - +{{"{{"}} config(materialized = 'materialized_view') {{"}}"}} select order_date, count(*) AS number_of_orders -from {{ ref('orders_raw') }} +from {{"{{"}} ref('orders_raw') {{"}}"}} --- Process a smaller range unless we're in 'prod' mode +-- During development, only process a smaller range of data {% if var('bundle_target', 'default') != 'prod' %} -where created_at >= '2019-08-01' and created_at < '2019-09-01' +where order_date >= '2019-08-01' and order_date < '2019-09-01' {% endif %} group by order_date diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl similarity index 59% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql rename to libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl index dd2befb445..c5a509f2d2 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl @@ -1,7 +1,11 @@ --- Example streaming table to ingest /databricks-datasets/retail-org/sales_orders/*.json +-- This model file defines a streaming table called 'orders_raw' +-- in the catalog/schema selected in resources/{{.project_name}}_job.yml or +-- in the current dbt profile during development. +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables -- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. -{{ config(materialized = 'streaming_table') }} +{{"{{"}} config(materialized = 'streaming_table') {{"}}"}} select customer_name, diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl index 3f3b1da519..01381f08cc 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl @@ -10,7 +10,7 @@ prompts: hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' hide_input: true http_path: - hint 'HTTP path of SQL warehouse to use' + hint: 'HTTP path of SQL warehouse to use' default: {{.http_path}} {{- if eq .catalog ""}} _choose_unity_catalog: diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index b45dce176e..9f372ea1c6 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -28,8 +28,8 @@ The '{{.project_name}}' project was generated by using the default-python templa $ databricks bundle deploy --target prod ``` - Note that the job from the default template comes with a schedule that runs - every day (defined in resources/{{.project_name}}_job.yml). The schedule + Note that the default job from the template has a schedule that runs every day + (defined in resources/{{.project_name}}_job.yml). The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). From 6fc5ed4edf590ce5732756a0bd59abec131443d9 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 22 Jan 2024 09:23:33 +0100 Subject: [PATCH 13/23] Offer an option to use personal schemas --- .../dbt-sql/databricks_template_schema.json | 45 ++++++++++++++++--- .../template/{{.project_name}}/README.md.tmpl | 29 ++++++------ .../{{.project_name}}/databricks.yml.tmpl | 28 +++++++----- .../profile_template.yml.tmpl | 16 ++++--- .../resources/{{.project_name}}_job.yml.tmpl | 1 + .../template/{{.project_name}}/README.md.tmpl | 2 +- 6 files changed, 81 insertions(+), 40 deletions(-) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 276408656e..5c21bc459d 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -13,25 +13,56 @@ "type": "string", "pattern": "^/sql/.\\../warehouses/[a-z0-9]+$", "pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/abcdef1234567890", - "description": "\nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development\nYou can find this path by clicking on \"Connection Details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", + "description": " \nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection Details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", + "order": 2 + }, + "personal_schemas": { + "type": "string", + "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", + "enum": [ + "yes, use a personal schema for each user during development", + "no, use a shared schema during development" + ], "order": 3 }, - "catalog": { + "default_catalog": { "type": "string", "default": "", "pattern": "^\\w*$", "pattern_match_failure_message": "Invalid catalog name.", - "description": "\nPlease provide an initial catalog (leave blank to use the workspace's default catalog).\ncatalog", + "description": "\nPlease provide an initial catalog (leave blank to use the workspace's default catalog).\ndefault_catalog", "order": 4 }, - "schema": { + "shared_schema": { + "skip_prompt_if": { + "properties": { + "personal_schemas": { + "const": "yes, use a personal schema for each user during development" + } + } + }, "type": "string", "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide a default schema for this project.\nNote that you can pick a different schema for local development when you first use the 'dbt init' command.\nschema", - "order": 4 + "description": "\nPlease provide a initial schema during development.\ndefault_schema", + "order": 5 + }, + "prod_schema": { + "skip_prompt_if": { + "properties": { + "personal_schemas": { + "const": "no, use a shared schema during development" + } + } + }, + "type": "string", + "default": "default", + "pattern": "^\\w+$", + "pattern_match_failure_message": "Invalid schema name.", + "description": "\nPlease provide a initial schema for production.\nNote that your production catalog, schema, and warehouse can always be changed in your databricks.yml file.\ndefault_schema", + "order": 5 } }, - "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nIf you already have dbt installed, just type 'dbt init' to get started.\nPlease refer to the README.md file for full \"getting started\" instructions.\n" + "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nPlease navigate to that directory and type 'dbt init' to get started.\nRefer to the README.md file for full \"getting started\" guide and production setup instructions.\n" } diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index 5a700b3197..fd534a1a68 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -8,11 +8,14 @@ that run dbt models. * Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. * Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html +The remainder of this file includes instructions for local development (using dbt) +and deployment to production (using Databricks Asset Bundles). + ## Development setup 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html -2. Authenticate to your Databricks workspace: +2. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks configure ``` @@ -96,10 +99,11 @@ For example, the default template would deploy a job called `[dev yourname] {{.project_name}}_job` to your workspace. You can find that job by opening your workpace and clicking on **Workflows**. -To run the deployed job, use the "run" command: -``` -$ databricks bundle run --target dev -``` +You can also deploy to your production target directly from the command-line. +The warehouse, catalog, and schema for that target are configured in databricks.yml. +When deploying to this target, note that the default job at resources/{{.project_name}}_job.yml +has a schedule set that runs every day. The schedule is paused when deploying in development mode +(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). To deploy a production copy, type: @@ -107,18 +111,15 @@ To deploy a production copy, type: $ databricks bundle deploy --target prod ``` -Note that the default job from the template has a schedule that runs every day -(defined in resources/{{.project_name}}_job.yml). The schedule -is paused when deploying in development mode (see -https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). +## CI/CD + +Using CI/CD, deployment can be automated! + +See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. ## IDE support Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions related to dbt may further enhance your dbt development experience! - -## CI/CD - -See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation -on CI/CD setup. diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index 3b1aeac836..6a72f4890c 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -15,6 +15,18 @@ variables: schema: description: The schema to use +{{- $catalog := .default_catalog}} +{{- if eq .default_catalog ""}} +{{- $catalog = "\"\" # workspace default"}} +{{- end}} + +{{- $dev_schema := .shared_schema }} +{{- $prod_schema := .shared_schema }} +{{- if (regexp "^yes").MatchString .personal_schemas}} +{{- $dev_schema = "${workspace.current_user.short_name}"}} +{{- $prod_schema = .prod_schema}} +{{- end}} + # Deployment targets. targets: dev: @@ -24,12 +36,8 @@ targets: host: {{workspace_host}} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} - {{- if eq .catalog ""}} - catalog: "" # workspace default - {{- else}} - catalog: {{.catalog}} - {{- end}} - schema: {{.schema}} # tip: use ${workspace.current_user.short_name} if you want your own schema + catalog: {{$catalog}} + schema: {{$dev_schema}} prod: mode: production @@ -38,12 +46,8 @@ targets: root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} - {{- if eq .catalog ""}} - catalog: "" # workspace default - {{- else}} - catalog: {{.catalog}} - {{- end}} - schema: {{.schema}} + catalog: {{$catalog}} + schema: {{$prod_schema}} {{- if not is_service_principal}} run_as: # This runs as {{user_name}} in production. We could also use a service principal here diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl index 01381f08cc..0151916bb1 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl @@ -12,9 +12,9 @@ prompts: http_path: hint: 'HTTP path of SQL warehouse to use' default: {{.http_path}} - {{- if eq .catalog ""}} + {{- if eq .default_catalog ""}} _choose_unity_catalog: - 'use the workspace default catalog (or do not use Unity Catalog)': + 'use the default workspace catalog (or do not use Unity Catalog)': _fixed_catalog: null 'specify a default catalog': catalog: @@ -22,12 +22,16 @@ prompts: {{- else}} catalog: hint: 'initial catalog' - default: {{.catalog}} + default: {{.default_catalog}} {{- end}} schema: - hint: 'default schema where dbt will build objects, e.g. {{.schema}} or {{short_name}}_dev' - default: {{.schema}} + {{- if (regexp "^yes").MatchString .personal_schemas}} + hint: 'personal schema where dbt will build objects during development (example: {{short_name}})' + {{- else}} + hint: 'default schema where dbt will build objects' + default: {{.shared_schema}} + {{- end}} threads: - hint: '1 or more' + hint: 'threads to use during development, 1 or more' type: 'int' default: 4 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index 4ea0eab48e..8c95054467 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -26,6 +26,7 @@ resources: new_cluster: spark_version: {{template "latest_lts_dbr_version"}} node_type_id: {{smallest_node_type}} + data_security_mode: SINGLE_USER num_workers: 0 spark_conf: spark.master: "local[*, 4]" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 9f372ea1c6..5adade0b31 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -6,7 +6,7 @@ The '{{.project_name}}' project was generated by using the default-python templa 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html -2. Authenticate to your Databricks workspace: +2. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks configure ``` From 99f920eaf9c8bedcd6332dc1160090da6f0d9a97 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Wed, 24 Jan 2024 13:41:20 +0100 Subject: [PATCH 14/23] Fix ANSI mode --- bundle/config/mutator/populate_current_user.go | 2 +- .../{{.project_name}}/models/example/orders_raw.sql.tmpl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle/config/mutator/populate_current_user.go b/bundle/config/mutator/populate_current_user.go index eba5ba82da..e9f2f47fad 100644 --- a/bundle/config/mutator/populate_current_user.go +++ b/bundle/config/mutator/populate_current_user.go @@ -47,6 +47,6 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error // We leave the full range of unicode letters in tact, but remove all "special" characters, // including dots, which are not supported in e.g. experiment names. func GetShortUserName(userName string) string { - local, _, _ := strings.Cut(emailAddress, "@") + local, _, _ := strings.Cut(userName, "@") return textutil.NormalizeString(local) } diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl index c5a509f2d2..6c000254f3 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl @@ -9,7 +9,7 @@ select customer_name, - date(timestamp(from_unixtime(order_datetime))) as order_date, + date(timestamp(from_unixtime(try_cast(order_datetime as bigint)))) as order_date, order_number from stream read_files( "/databricks-datasets/retail-org/sales_orders/", From 1099eed75ca8e21411c5b5dc2c24cb31b0854115 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 25 Jan 2024 21:15:28 +0100 Subject: [PATCH 15/23] Don't ask for a "production" schema, just assume "default" --- .../dbt-sql/databricks_template_schema.json | 31 +++++-------------- .../{{.project_name}}/databricks.yml.tmpl | 2 +- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 5c21bc459d..68cd2622e6 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -16,6 +16,14 @@ "description": " \nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection Details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", "order": 2 }, + "default_catalog": { + "type": "string", + "default": "", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "\nPlease provide an initial catalog (leave blank to use the workspace's default catalog).\ndefault_catalog", + "order": 4 + }, "personal_schemas": { "type": "string", "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", @@ -25,14 +33,6 @@ ], "order": 3 }, - "default_catalog": { - "type": "string", - "default": "", - "pattern": "^\\w*$", - "pattern_match_failure_message": "Invalid catalog name.", - "description": "\nPlease provide an initial catalog (leave blank to use the workspace's default catalog).\ndefault_catalog", - "order": 4 - }, "shared_schema": { "skip_prompt_if": { "properties": { @@ -47,21 +47,6 @@ "pattern_match_failure_message": "Invalid schema name.", "description": "\nPlease provide a initial schema during development.\ndefault_schema", "order": 5 - }, - "prod_schema": { - "skip_prompt_if": { - "properties": { - "personal_schemas": { - "const": "no, use a shared schema during development" - } - } - }, - "type": "string", - "default": "default", - "pattern": "^\\w+$", - "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide a initial schema for production.\nNote that your production catalog, schema, and warehouse can always be changed in your databricks.yml file.\ndefault_schema", - "order": 5 } }, "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nPlease navigate to that directory and type 'dbt init' to get started.\nRefer to the README.md file for full \"getting started\" guide and production setup instructions.\n" diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index 6a72f4890c..e88329dfca 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -24,7 +24,7 @@ variables: {{- $prod_schema := .shared_schema }} {{- if (regexp "^yes").MatchString .personal_schemas}} {{- $dev_schema = "${workspace.current_user.short_name}"}} -{{- $prod_schema = .prod_schema}} +{{- $prod_schema = "default"}} {{- end}} # Deployment targets. From 33c5e9167ba9642e2ac1321da43cf1cc6c47d324 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Thu, 25 Jan 2024 22:10:47 +0100 Subject: [PATCH 16/23] Explain mode: development --- .../dbt-sql/template/{{.project_name}}/databricks.yml.tmpl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index e88329dfca..aa7a679d89 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -31,6 +31,8 @@ variables: targets: dev: default: true + # We use 'mode: development' to indicate this is a personal development copy. + # Any job schedules and triggers are paused by default. mode: development workspace: host: {{workspace_host}} From 7275310e8eda89441eec175b7d586c0bf6a53139 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 26 Jan 2024 09:38:43 +0100 Subject: [PATCH 17/23] Change project layout based on OSS team feedback --- libs/template/renderer_test.go | 27 +++++++++----- .../template/{{.project_name}}/README.md.tmpl | 29 ++++++++++----- .../{{.project_name}}/databricks.yml.tmpl | 34 +++--------------- .../dbt_profiles/profiles.yml.tmpl | 36 +++++++++++++++++++ .../{{.project_name}}/dbt_project.yml.tmpl | 16 ++++----- .../resources/{{.project_name}}_job.yml.tmpl | 25 ++++++++----- .../{ => src}/analyses/.gitkeep | 0 .../{ => src}/macros/.gitkeep | 0 .../models/example/orders_daily.sql.tmpl | 2 +- .../models/example/orders_raw.sql.tmpl | 0 .../{ => src}/models/example/schema.yml | 0 .../{ => src}/seeds/.gitkeep | 0 .../{ => src}/snapshots/.gitkeep | 0 .../{ => src}/tests/.gitkeep | 0 14 files changed, 104 insertions(+), 65 deletions(-) create mode 100644 libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/analyses/.gitkeep (100%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/macros/.gitkeep (100%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/models/example/orders_daily.sql.tmpl (94%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/models/example/orders_raw.sql.tmpl (100%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/models/example/schema.yml (100%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/seeds/.gitkeep (100%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/snapshots/.gitkeep (100%) rename libs/template/templates/dbt-sql/template/{{.project_name}}/{ => src}/tests/.gitkeep (100%) diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index 766d326b91..1365619448 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -50,6 +50,9 @@ func assertBuiltinTemplateValid(t *testing.T, template string, settings map[stri // Prepare helpers cachedUser = &iam.User{UserName: "user@domain.com"} + if isServicePrincipal { + cachedUser.UserName = "1d410060-a513-496f-a197-23cc82e5f46d" + } cachedIsServicePrincipal = &isServicePrincipal ctx = root.SetWorkspaceClient(ctx, w) helpers := loadHelpers(ctx) @@ -141,17 +144,23 @@ func TestBuiltinPythonTemplateValid(t *testing.T) { } func TestBuiltinDbtTemplateValid(t *testing.T) { - // Test prod mode + build - config := map[string]any{ - "project_name": "my_project", - "http_path": "/sql/warehouses/123", - "catalog": "hive_metastore", - "schema": "lennart", + for _, personal_schemas := range []string{"yes", "no"} { + for _, target := range []string{"dev", "prod"} { + for _, isServicePrincipal := range []bool{true, false} { + config := map[string]any{ + "project_name": "my_project", + "http_path": "/sql/1.0/warehouses/123", + "default_catalog": "hive_metastore", + "personal_schemas": personal_schemas, + "shared_schema": "lennart", + } + build := false + assertBuiltinTemplateValid(t, "dbt-sql", config, target, isServicePrincipal, build, t.TempDir()) + } + } } - build := false - assertBuiltinTemplateValid(t, "dbt-sql", config, "dev", true, build, t.TempDir()) - assertBuiltinTemplateValid(t, "dbt-sql", config, "prod", false, build, t.TempDir()) } + func TestRendererWithAssociatedTemplateInLibrary(t *testing.T) { tmpDir := t.TempDir() diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index fd534a1a68..5354ceccc6 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -72,6 +72,13 @@ $ dbt run to try the last step from the development setup above to re-activate your Python virtual environment!) + +To just evaluate a single model defined in a file called orders.sql, use: + +``` +$ dbt run --model orders +``` + Use `dbt test` to run tests generated from yml files such as `models/schema.yml` and any SQL tests from `tests/` @@ -79,7 +86,20 @@ and any SQL tests from `tests/` $ dbt test ``` -## Deploying to Databricks with Databricks Asset Bundles +## Production setup + +Your production dbt profiles are defined in dbt_profiles/profiles.yml. +These profiles define the default catalog, schema, and any other +target-specific settings. Read more about dbt profiles on Databricks at +https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. + +The target workspaces for staging and prod are defined in databricks.yml. +You can manaulyl deploy based on these configurations (see below). +Or you can use CI/CD to automate deployment. See +https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. + +## Manually deploying to to Databricks with Databricks Asset Bundles Databricks Asset Bundles can be used to deploy to Databricks and to execute dbt commands as a job using Databricks Workflows. See @@ -111,13 +131,6 @@ To deploy a production copy, type: $ databricks bundle deploy --target prod ``` -## CI/CD - -Using CI/CD, deployment can be automated! - -See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation -on CI/CD setup. - ## IDE support Optionally, install developer tools such as the Databricks extension for Visual Studio Code from diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index aa7a679d89..fdda03c0d0 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -1,4 +1,5 @@ -# This is a Databricks asset bundle definition for {{.project_name}}. +# This file defines the structure of this project and how it is deployed +# to production using Databricks Asset Bundles. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: name: {{.project_name}} @@ -6,28 +7,8 @@ bundle: include: - resources/*.yml -# Variable declarations. These variables are assigned in the dev/prod targets below. -variables: - warehouse_id: - description: The warehouse to use - catalog: - description: The catalog to use - schema: - description: The schema to use - -{{- $catalog := .default_catalog}} -{{- if eq .default_catalog ""}} -{{- $catalog = "\"\" # workspace default"}} -{{- end}} - -{{- $dev_schema := .shared_schema }} -{{- $prod_schema := .shared_schema }} -{{- if (regexp "^yes").MatchString .personal_schemas}} -{{- $dev_schema = "${workspace.current_user.short_name}"}} -{{- $prod_schema = "default"}} -{{- end}} - # Deployment targets. +# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml targets: dev: default: true @@ -36,20 +17,13 @@ targets: mode: development workspace: host: {{workspace_host}} - variables: - warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} - catalog: {{$catalog}} - schema: {{$dev_schema}} prod: mode: production workspace: host: {{workspace_host}} + # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} - variables: - warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} - catalog: {{$catalog}} - schema: {{$prod_schema}} {{- if not is_service_principal}} run_as: # This runs as {{user_name}} in production. We could also use a service principal here diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl new file mode 100644 index 0000000000..d29bd55ce3 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl @@ -0,0 +1,36 @@ +{{- $catalog := .default_catalog}} +{{- if eq .default_catalog ""}} +{{- $catalog = "\"\" # workspace default"}} +{{- end}} +# This file defines dbt profiles for deployed dbt jobs. +# Note that for local development you should create your own, local profile. +# (see README.md). +my_dbt_project: + target: dev # default target + outputs: + + dev: + type: databricks + method: http + catalog: {{$catalog}} + schema: "{{"{{"}} var('dev_schema') {{"}}"}}" + + http_path: {{.http_path}} + + # The workspace host / token are provided by Databricks + # see databricks.yml for the host used for 'dev' + host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}" + token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}" + + prod: + type: databricks + method: http + catalog: {{$catalog}} + schema: {{.shared_schema}} + + http_path: {{.http_path}} + + # The workspace host / token are provided by Databricks + # see databricks.yml for the host used for 'dev' + host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}" + token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}" diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl index 560db52d46..11fbf051e3 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_project.yml.tmpl @@ -6,14 +6,14 @@ config-version: 2 profile: '{{.project_name}}' # These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that models in this project can be -# found in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -analysis-paths: ["analyses"] -test-paths: ["tests"] -seed-paths: ["seeds"] -macro-paths: ["macros"] -snapshot-paths: ["snapshots"] +# For Databricks asset bundles, we put everything in src, as you may have +# non-dbt resources in your project. +model-paths: ["src/models"] +analysis-paths: ["src/analyses"] +test-paths: ["src/tests"] +seed-paths: ["src/seeds"] +macro-paths: ["src/macros"] +snapshot-paths: ["src/snapshots"] clean-targets: # directories to be removed by `dbt clean` - "target" diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index 8c95054467..688c23b920 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -12,17 +12,27 @@ resources: on_failure: - {{user_name}} +{{- $dev_schema := .shared_schema }} +{{- if (regexp "^yes").MatchString .personal_schemas}} +{{- $dev_schema = "${workspace.current_user.short_name}"}} +{{- end}} + tasks: - task_key: dbt + dbt_task: project_directory: ../ + # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml + profiles_directory: dbt_profiles/ commands: - - 'dbt deps' - - 'dbt seed' - - 'dbt run --vars "{ "bundle_target": "${bundle.target}" }"' - warehouse_id: ${var.warehouse_id} - catalog: ${var.catalog} - schema: ${var.schema} + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"' + + libraries: + - pypi: + package: dbt-databricks>=1.0.0,<2.0.0 + new_cluster: spark_version: {{template "latest_lts_dbr_version"}} node_type_id: {{smallest_node_type}} @@ -33,6 +43,3 @@ resources: spark.databricks.cluster.profile: singleNode custom_tags: ResourceClass: SingleNode - libraries: - - pypi: - package: dbt-databricks>=1.0.0,<2.0.0 diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/analyses/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/analyses/.gitkeep similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/analyses/.gitkeep rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/analyses/.gitkeep diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/macros/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/macros/.gitkeep similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/macros/.gitkeep rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/macros/.gitkeep diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_daily.sql.tmpl similarity index 94% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql.tmpl rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_daily.sql.tmpl index 29d32a19ab..f7aa2ec791 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_daily.sql.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_daily.sql.tmpl @@ -11,7 +11,7 @@ select order_date, count(*) AS number_of_orders from {{"{{"}} ref('orders_raw') {{"}}"}} -- During development, only process a smaller range of data -{% if var('bundle_target', 'default') != 'prod' %} +{% if target.name != 'prod' %} where order_date >= '2019-08-01' and order_date < '2019-09-01' {% endif %} diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_raw.sql.tmpl similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/orders_raw.sql.tmpl rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_raw.sql.tmpl diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/schema.yml similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/models/example/schema.yml rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/schema.yml diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/seeds/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/seeds/.gitkeep similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/seeds/.gitkeep rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/seeds/.gitkeep diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/snapshots/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/snapshots/.gitkeep similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/snapshots/.gitkeep rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/snapshots/.gitkeep diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/tests/.gitkeep b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/tests/.gitkeep similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/tests/.gitkeep rename to libs/template/templates/dbt-sql/template/{{.project_name}}/src/tests/.gitkeep From de7bd784d8b52799394079743da0f4317b0a1046 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sat, 27 Jan 2024 22:35:18 +0100 Subject: [PATCH 18/23] Improve DX with default_catalog helper --- libs/template/helpers.go | 21 +++++++++++++++++++ libs/template/renderer_test.go | 2 ++ .../dbt-sql/databricks_template_schema.json | 16 +++++++------- .../profile_template.yml.tmpl | 2 +- .../src/models/example/orders_daily.sql.tmpl | 10 +++++++-- .../src/models/example/orders_raw.sql.tmpl | 2 -- 6 files changed, 40 insertions(+), 13 deletions(-) diff --git a/libs/template/helpers.go b/libs/template/helpers.go index c01fa2b652..19c01738b8 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/auth" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/iam" ) @@ -30,6 +31,7 @@ type pair struct { var cachedUser *iam.User var cachedIsServicePrincipal *bool +var cachedCatalog *string func loadHelpers(ctx context.Context) template.FuncMap { w := root.WorkspaceClient(ctx) @@ -113,6 +115,25 @@ func loadHelpers(ctx context.Context) template.FuncMap { } return result, nil }, + // Get the default workspace catalog. If there is no default, or if + // Unity Catalog is not enabled, return an empty string. + "default_catalog": func() (string, error) { + if cachedCatalog == nil { + metastore, err := w.Metastores.Current(ctx) + if err != nil { + var aerr *apierr.APIError + if errors.As(err, &aerr) && aerr.ErrorCode == "METASTORE_DOES_NOT_EXIST" { + // Workspace doesn't have a metastore assigned, ignore error + empty_default := "" + cachedCatalog = &empty_default + return "", nil + } + return "", err + } + cachedCatalog = &metastore.DefaultCatalogName + } + return *cachedCatalog, nil + }, "is_service_principal": func() (bool, error) { if cachedIsServicePrincipal != nil { return *cachedIsServicePrincipal, nil diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index 1365619448..b2a9275683 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -106,6 +106,8 @@ func TestBuiltinPythonTemplateValid(t *testing.T) { options := []string{"yes", "no"} isServicePrincipal := false build := false + catalog := "hive_metastore" + cachedCatalog = &catalog for _, includeNotebook := range options { for _, includeDlt := range options { for _, includePython := range options { diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 68cd2622e6..a4e3f716ce 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -5,7 +5,7 @@ "type": "string", "pattern": "^[A-Za-z_][A-Za-z0-9_]+$", "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores.", - "default": "my_dbt_project", + "default": "dbt_project", "description": "\nPlease provide a unique name for this project.\nproject_name", "order": 1 }, @@ -18,26 +18,26 @@ }, "default_catalog": { "type": "string", - "default": "", + "default": "{{default_catalog}}", "pattern": "^\\w*$", "pattern_match_failure_message": "Invalid catalog name.", - "description": "\nPlease provide an initial catalog (leave blank to use the workspace's default catalog).\ndefault_catalog", - "order": 4 + "description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog", + "order": 3 }, "personal_schemas": { "type": "string", "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", "enum": [ - "yes, use a personal schema for each user during development", + "yes, use a schema based on the current user name during development", "no, use a shared schema during development" ], - "order": 3 + "order": 4 }, "shared_schema": { "skip_prompt_if": { "properties": { "personal_schemas": { - "const": "yes, use a personal schema for each user during development" + "const": "yes, use a schema based on the current user name during development" } } }, @@ -49,5 +49,5 @@ "order": 5 } }, - "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nPlease navigate to that directory and type 'dbt init' to get started.\nRefer to the README.md file for full \"getting started\" guide and production setup instructions.\n" + "success_message": "\nšŸ“Š Your new project has been created in the '{{.project_name}}' directory!\nIf you already have dbt installed, just type 'cd {{.project_name}}; dbt init' to get started.\nRefer to the README.md file for full \"getting started\" guide and production setup instructions.\n" } diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl index 0151916bb1..1bab573f2f 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/profile_template.yml.tmpl @@ -26,7 +26,7 @@ prompts: {{- end}} schema: {{- if (regexp "^yes").MatchString .personal_schemas}} - hint: 'personal schema where dbt will build objects during development (example: {{short_name}})' + hint: 'personal schema where dbt will build objects during development, example: {{short_name}}' {{- else}} hint: 'default schema where dbt will build objects' default: {{.shared_schema}} diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_daily.sql.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_daily.sql.tmpl index f7aa2ec791..a8b4c2f9af 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_daily.sql.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_daily.sql.tmpl @@ -1,10 +1,16 @@ +{{- if eq (default_catalog) ""}} +{{- /* This workspace might not have Unity Catalog, */}} +{{- /* so let's not show both materialized views and streaming tables. */}} +{{- /* They're not supported without Unity Catalog! */}} +-- This model file defines a table called 'orders_daily' +{{"{{"}} config(materialized = 'table') {{"}}"}} +{{- else}} -- This model file defines a materialized view called 'orders_daily' --- in the catalog/schema selected in resources/{{.project_name}}_job.yml or --- in the current dbt profile during development. -- -- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables -- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. {{"{{"}} config(materialized = 'materialized_view') {{"}}"}} +{{- end}} select order_date, count(*) AS number_of_orders diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_raw.sql.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_raw.sql.tmpl index 6c000254f3..17e6a5bf32 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_raw.sql.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/src/models/example/orders_raw.sql.tmpl @@ -1,6 +1,4 @@ -- This model file defines a streaming table called 'orders_raw' --- in the catalog/schema selected in resources/{{.project_name}}_job.yml or --- in the current dbt profile during development. -- -- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables From 8e7c6a109cf2b77da43c33d5b543a5a1460b2bec Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sun, 28 Jan 2024 09:10:28 +0100 Subject: [PATCH 19/23] Remove from list of templates for now --- cmd/bundle/init.go | 9 +++++---- cmd/bundle/init_test.go | 2 -- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 2434606ffe..5e5a5c4922 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -34,10 +34,11 @@ var nativeTemplates = []nativeTemplate{ name: "default-python", description: "The default Python template for Notebooks / Delta Live Tables / Workflows", }, - { - name: "dbt-sql", - description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", - }, + // Coming soon, see https://github.com/databricks/cli/pull/1059 + // { + // name: "dbt-sql", + // description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", + // }, { name: "mlops-stacks", gitUrl: "https://github.com/databricks/mlops-stacks", diff --git a/cmd/bundle/init_test.go b/cmd/bundle/init_test.go index ca97d63068..aa89915968 100644 --- a/cmd/bundle/init_test.go +++ b/cmd/bundle/init_test.go @@ -30,7 +30,6 @@ func TestBundleInitRepoName(t *testing.T) { func TestNativeTemplateOptions(t *testing.T) { expected := []cmdio.Tuple{ {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, - {Name: "dbt-sql", Id: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, {Name: "custom...", Id: "Bring your own template"}, } @@ -39,7 +38,6 @@ func TestNativeTemplateOptions(t *testing.T) { func TestNativeTemplateHelpDescriptions(t *testing.T) { expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows -- dbt-sql: The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) - mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` assert.Equal(t, expected, nativeTemplateHelpDescriptions()) } From 18c6b7059b589ed349f9f0a3415ba57f05b66607 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Sun, 28 Jan 2024 09:18:30 +0100 Subject: [PATCH 20/23] Update README.md --- libs/template/templates/dbt-sql/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/template/templates/dbt-sql/README.md b/libs/template/templates/dbt-sql/README.md index 9b750bf560..4ccacab10b 100644 --- a/libs/template/templates/dbt-sql/README.md +++ b/libs/template/templates/dbt-sql/README.md @@ -1,8 +1,9 @@ # dbt template This folder provides a template for using dbt-core with Databricks Asset Bundles. -It follows the standard dbt project structure and has an additional `resources` -directory to define Databricks resources such as jobs that run dbt models. +It leverages dbt-core for local development and relies on Databricks Asset Bundles +for deployment (either manually or with CI/CD). In production, +dbt is executed using Databricks Workflows. * Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. * Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html From 2f52ff11316e1e3f08e288c89824558dd1f0f322 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 29 Jan 2024 09:51:27 +0100 Subject: [PATCH 21/23] Mark as experimental --- libs/template/templates/dbt-sql/databricks_template_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index a4e3f716ce..9b36ded8f2 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -1,5 +1,5 @@ { - "welcome_message": "\nWelcome to the dbt template for Databricks Asset Bundles!", + "welcome_message": "\nWelcome to the (EXPERIMENTAL) dbt template for Databricks Asset Bundles!", "properties": { "project_name": { "type": "string", From e0411483be716ca159dff7ab486c0951f26eec38 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 19 Feb 2024 09:55:03 +0100 Subject: [PATCH 22/23] Restore sql-dbt template in hidden form --- cmd/bundle/init.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 5e5a5c4922..569e8055b7 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -25,6 +25,7 @@ type nativeTemplate struct { gitUrl string description string aliases []string + hidden bool } const customTemplate = "custom..." @@ -34,11 +35,11 @@ var nativeTemplates = []nativeTemplate{ name: "default-python", description: "The default Python template for Notebooks / Delta Live Tables / Workflows", }, - // Coming soon, see https://github.com/databricks/cli/pull/1059 - // { - // name: "dbt-sql", - // description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", - // }, + { + name: "dbt-sql", + description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", + hidden: true, + }, { name: "mlops-stacks", gitUrl: "https://github.com/databricks/mlops-stacks", @@ -55,7 +56,7 @@ var nativeTemplates = []nativeTemplate{ func nativeTemplateHelpDescriptions() string { var lines []string for _, template := range nativeTemplates { - if template.name != customTemplate { + if template.name != customTemplate && !template.hidden { lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description)) } } @@ -66,6 +67,9 @@ func nativeTemplateHelpDescriptions() string { func nativeTemplateOptions() []cmdio.Tuple { names := make([]cmdio.Tuple, 0, len(nativeTemplates)) for _, template := range nativeTemplates { + if template.hidden { + continue + } tuple := cmdio.Tuple{ Name: template.name, Id: template.description, From e5fb7083ba40a1886a9a8651769a328dfeb07a0f Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 19 Feb 2024 10:04:07 +0100 Subject: [PATCH 23/23] Copy-editing --- .../templates/dbt-sql/databricks_template_schema.json | 6 +++--- .../dbt-sql/template/{{.project_name}}/README.md.tmpl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 9b36ded8f2..736b123257 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -12,8 +12,8 @@ "http_path": { "type": "string", "pattern": "^/sql/.\\../warehouses/[a-z0-9]+$", - "pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/abcdef1234567890", - "description": " \nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection Details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", + "pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/", + "description": " \nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", "order": 2 }, "default_catalog": { @@ -45,7 +45,7 @@ "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide a initial schema during development.\ndefault_schema", + "description": "\nPlease provide an initial schema during development.\ndefault_schema", "order": 5 } }, diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index 5354ceccc6..d46b61f72a 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -5,7 +5,7 @@ Databricks Asset Bundles. It follows the standard dbt project structure and has an additional `resources` directory to define Databricks resources such as jobs that run dbt models. -* Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. * Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html The remainder of this file includes instructions for local development (using dbt) @@ -94,12 +94,12 @@ target-specific settings. Read more about dbt profiles on Databricks at https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. The target workspaces for staging and prod are defined in databricks.yml. -You can manaulyl deploy based on these configurations (see below). +You can manaully deploy based on these configurations (see below). Or you can use CI/CD to automate deployment. See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation on CI/CD setup. -## Manually deploying to to Databricks with Databricks Asset Bundles +## Manually deploying to Databricks with Databricks Asset Bundles Databricks Asset Bundles can be used to deploy to Databricks and to execute dbt commands as a job using Databricks Workflows. See