diff --git a/dbt_sql/README.md b/dbt_sql/README.md index f012a898..ef22b54a 100644 --- a/dbt_sql/README.md +++ b/dbt_sql/README.md @@ -1,15 +1,15 @@ # dbt_sql The 'dbt_sql' project was generated by using the dbt template for -Databricks Asset Bundles. It follows the standard dbt project structure +Declarative Automation Bundles. It follows the standard dbt project structure and has an additional `resources` directory to define Databricks resources such as jobs that run dbt models. * Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. -* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html +* Learn more about Declarative Automation Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html The remainder of this file includes instructions for local development (using dbt) -and deployment to production (using Databricks Asset Bundles). +and deployment to production (using Declarative Automation Bundles). ## Development setup @@ -88,20 +88,20 @@ $ dbt test ## Production setup -Your production dbt profiles are defined in dbt_profiles/profiles.yml. -These profiles define the default catalog, schema, and any other +Your production dbt profiles are defined in `dbt_profiles/profiles.yml`. +These profiles define the default warehouse, catalog, schema, and any other target-specific settings. Read more about dbt profiles on Databricks at https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. -The target workspaces for staging and prod are defined in databricks.yml. +The target workspaces for staging and prod are defined in `databricks.yml`. You can manually deploy based on these configurations (see below). Or you can use CI/CD to automate deployment. See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation on CI/CD setup. -## Manually deploying to Databricks with Databricks Asset Bundles +## Manually deploying to Databricks with Declarative Automation Bundles -Databricks Asset Bundles can be used to deploy to Databricks and to execute +Declarative Automation Bundles can be used to deploy to Databricks and to execute dbt commands as a job using Databricks Workflows. See https://docs.databricks.com/dev-tools/bundles/index.html to learn more. @@ -120,7 +120,7 @@ For example, the default template would deploy a job called You can find that job by opening your workpace and clicking on **Workflows**. You can also deploy to your production target directly from the command-line. -The warehouse, catalog, and schema for that target are configured in databricks.yml. +The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. When deploying to this target, note that the default job at resources/dbt_sql.job.yml has a schedule set that runs every day. The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/dbt_sql/databricks.yml b/dbt_sql/databricks.yml index 62de07b7..e9bce917 100644 --- a/dbt_sql/databricks.yml +++ b/dbt_sql/databricks.yml @@ -1,5 +1,5 @@ # This file defines the structure of this project and how it is deployed -# to production using Databricks Asset Bundles. +# to production using Declarative Automation Bundles. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: name: dbt_sql @@ -7,7 +7,6 @@ bundle: include: - resources/*.yml - - resources/*/*.yml # Deployment targets. # The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml diff --git a/default_minimal/.vscode/extensions.json b/default_minimal/.vscode/extensions.json index 75a111a6..5ba48e79 100644 --- a/default_minimal/.vscode/extensions.json +++ b/default_minimal/.vscode/extensions.json @@ -2,6 +2,6 @@ "recommendations": [ "databricks.databricks", "redhat.vscode-yaml", - "ms-python.black-formatter" + "charliermarsh.ruff" ] } diff --git a/default_minimal/.vscode/settings.json b/default_minimal/.vscode/settings.json index c49593bc..d73c73b5 100644 --- a/default_minimal/.vscode/settings.json +++ b/default_minimal/.vscode/settings.json @@ -33,7 +33,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", + "editor.defaultFormatter": "charliermarsh.ruff", "editor.formatOnSave": true, }, } diff --git a/default_minimal/databricks.yml b/default_minimal/databricks.yml index 6e4dd55c..c788c8df 100644 --- a/default_minimal/databricks.yml +++ b/default_minimal/databricks.yml @@ -6,7 +6,6 @@ bundle: include: - resources/*.yml - - resources/*/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/default_python/.vscode/extensions.json b/default_python/.vscode/extensions.json index 75a111a6..5ba48e79 100644 --- a/default_python/.vscode/extensions.json +++ b/default_python/.vscode/extensions.json @@ -2,6 +2,6 @@ "recommendations": [ "databricks.databricks", "redhat.vscode-yaml", - "ms-python.black-formatter" + "charliermarsh.ruff" ] } diff --git a/default_python/.vscode/settings.json b/default_python/.vscode/settings.json index c49593bc..d73c73b5 100644 --- a/default_python/.vscode/settings.json +++ b/default_python/.vscode/settings.json @@ -33,7 +33,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", + "editor.defaultFormatter": "charliermarsh.ruff", "editor.formatOnSave": true, }, } diff --git a/default_python/databricks.yml b/default_python/databricks.yml index aa15077f..1c1578b2 100644 --- a/default_python/databricks.yml +++ b/default_python/databricks.yml @@ -6,7 +6,6 @@ bundle: include: - resources/*.yml - - resources/*/*.yml artifacts: python_artifact: diff --git a/default_python/pyproject.toml b/default_python/pyproject.toml index a90910ca..d89e336a 100644 --- a/default_python/pyproject.toml +++ b/default_python/pyproject.toml @@ -14,8 +14,10 @@ dependencies = [ [dependency-groups] dev = [ "pytest", + "ruff", "databricks-dlt", "databricks-connect>=15.4,<15.5", + "ipykernel", ] [project.scripts] @@ -25,5 +27,5 @@ main = "default_python.main:main" requires = ["hatchling"] build-backend = "hatchling.build" -[tool.black] -line-length = 125 +[tool.ruff] +line-length = 120 diff --git a/default_python/src/default_python/main.py b/default_python/src/default_python/main.py index 0cb0e736..bbe33994 100644 --- a/default_python/src/default_python/main.py +++ b/default_python/src/default_python/main.py @@ -5,7 +5,9 @@ def main(): # Process command-line arguments - parser = argparse.ArgumentParser(description="Databricks job with catalog and schema parameters") + parser = argparse.ArgumentParser( + description="Databricks job with catalog and schema parameters", + ) parser.add_argument("--catalog", required=True) parser.add_argument("--schema", required=True) args = parser.parse_args() diff --git a/default_python/tests/conftest.py b/default_python/tests/conftest.py index 4df274fd..72ebfeb5 100644 --- a/default_python/tests/conftest.py +++ b/default_python/tests/conftest.py @@ -1,8 +1,4 @@ -"""This file configures pytest. - -This file is in the root since it can be used for tests in any place in this -project, including tests under resources/. -""" +"""This file configures pytest, initializes Databricks Connect, and provides fixtures for Spark and loading test data.""" import os, sys, pathlib from contextlib import contextmanager diff --git a/default_sql/README.md b/default_sql/README.md index b8828c68..3f1e5d43 100644 --- a/default_sql/README.md +++ b/default_sql/README.md @@ -36,6 +36,11 @@ The 'default_sql' project was generated by using the default-sql template. 6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. -7. For documentation on the Databricks Asset Bundles format used +7. For documentation on the Declarative Automation Bundles format used for this project, and for CI/CD configuration, see https://docs.databricks.com/dev-tools/bundles/index.html. + +## Changing the warehouse, catalog, or schema + +The default SQL warehouse, catalog, and schema are configured in `databricks.yml`. +To change these settings, edit the `variables` section for each target (dev/prod). diff --git a/default_sql/databricks.yml b/default_sql/databricks.yml index 715c2ebe..db1c948b 100644 --- a/default_sql/databricks.yml +++ b/default_sql/databricks.yml @@ -6,7 +6,6 @@ bundle: include: - resources/*.yml - - resources/*/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/lakeflow_pipelines_python/.vscode/extensions.json b/lakeflow_pipelines_python/.vscode/extensions.json index 75a111a6..5ba48e79 100644 --- a/lakeflow_pipelines_python/.vscode/extensions.json +++ b/lakeflow_pipelines_python/.vscode/extensions.json @@ -2,6 +2,6 @@ "recommendations": [ "databricks.databricks", "redhat.vscode-yaml", - "ms-python.black-formatter" + "charliermarsh.ruff" ] } diff --git a/lakeflow_pipelines_python/.vscode/settings.json b/lakeflow_pipelines_python/.vscode/settings.json index c49593bc..d73c73b5 100644 --- a/lakeflow_pipelines_python/.vscode/settings.json +++ b/lakeflow_pipelines_python/.vscode/settings.json @@ -33,7 +33,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", + "editor.defaultFormatter": "charliermarsh.ruff", "editor.formatOnSave": true, }, } diff --git a/lakeflow_pipelines_python/databricks.yml b/lakeflow_pipelines_python/databricks.yml index 44beb468..ef70b307 100644 --- a/lakeflow_pipelines_python/databricks.yml +++ b/lakeflow_pipelines_python/databricks.yml @@ -6,7 +6,6 @@ bundle: include: - resources/*.yml - - resources/*/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/lakeflow_pipelines_python/pyproject.toml b/lakeflow_pipelines_python/pyproject.toml index 5e565ade..91cc59b7 100644 --- a/lakeflow_pipelines_python/pyproject.toml +++ b/lakeflow_pipelines_python/pyproject.toml @@ -14,8 +14,10 @@ dependencies = [ [dependency-groups] dev = [ "pytest", + "ruff", "databricks-dlt", "databricks-connect>=15.4,<15.5", + "ipykernel", ] [project.scripts] @@ -28,5 +30,5 @@ build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["src"] -[tool.black] -line-length = 125 +[tool.ruff] +line-length = 120 diff --git a/lakeflow_pipelines_sql/.vscode/extensions.json b/lakeflow_pipelines_sql/.vscode/extensions.json index 75a111a6..5ba48e79 100644 --- a/lakeflow_pipelines_sql/.vscode/extensions.json +++ b/lakeflow_pipelines_sql/.vscode/extensions.json @@ -2,6 +2,6 @@ "recommendations": [ "databricks.databricks", "redhat.vscode-yaml", - "ms-python.black-formatter" + "charliermarsh.ruff" ] } diff --git a/lakeflow_pipelines_sql/.vscode/settings.json b/lakeflow_pipelines_sql/.vscode/settings.json index c49593bc..d73c73b5 100644 --- a/lakeflow_pipelines_sql/.vscode/settings.json +++ b/lakeflow_pipelines_sql/.vscode/settings.json @@ -33,7 +33,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", + "editor.defaultFormatter": "charliermarsh.ruff", "editor.formatOnSave": true, }, } diff --git a/lakeflow_pipelines_sql/databricks.yml b/lakeflow_pipelines_sql/databricks.yml index 4c4d7a91..bd42ad0e 100644 --- a/lakeflow_pipelines_sql/databricks.yml +++ b/lakeflow_pipelines_sql/databricks.yml @@ -6,7 +6,6 @@ bundle: include: - resources/*.yml - - resources/*/*.yml # Variable declarations. These variables are assigned in the dev/prod targets below. variables: diff --git a/pydabs/.vscode/extensions.json b/pydabs/.vscode/extensions.json index 75a111a6..5ba48e79 100644 --- a/pydabs/.vscode/extensions.json +++ b/pydabs/.vscode/extensions.json @@ -2,6 +2,6 @@ "recommendations": [ "databricks.databricks", "redhat.vscode-yaml", - "ms-python.black-formatter" + "charliermarsh.ruff" ] } diff --git a/pydabs/.vscode/settings.json b/pydabs/.vscode/settings.json index c49593bc..d73c73b5 100644 --- a/pydabs/.vscode/settings.json +++ b/pydabs/.vscode/settings.json @@ -33,7 +33,7 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", + "editor.defaultFormatter": "charliermarsh.ruff", "editor.formatOnSave": true, }, } diff --git a/pydabs/databricks.yml b/pydabs/databricks.yml index 57634dd6..8bf72636 100644 --- a/pydabs/databricks.yml +++ b/pydabs/databricks.yml @@ -12,7 +12,6 @@ python: include: - resources/*.yml - - resources/*/*.yml artifacts: python_artifact: diff --git a/pydabs/pyproject.toml b/pydabs/pyproject.toml index 8621579b..44c02cc2 100644 --- a/pydabs/pyproject.toml +++ b/pydabs/pyproject.toml @@ -14,9 +14,11 @@ dependencies = [ [dependency-groups] dev = [ "pytest", + "ruff", "databricks-dlt", "databricks-connect>=15.4,<15.5", - "databricks-bundles==0.279.0", + "ipykernel", + "databricks-bundles==0.295.0", ] [project.scripts] @@ -26,5 +28,5 @@ main = "pydabs.main:main" requires = ["hatchling"] build-backend = "hatchling.build" -[tool.black] -line-length = 125 +[tool.ruff] +line-length = 120 diff --git a/pydabs/src/pydabs/main.py b/pydabs/src/pydabs/main.py index 7ab6d7b4..0b7d9a00 100644 --- a/pydabs/src/pydabs/main.py +++ b/pydabs/src/pydabs/main.py @@ -6,7 +6,7 @@ def main(): # Process command-line arguments parser = argparse.ArgumentParser( - description="Databricks job with catalog and schema parameters" + description="Databricks job with catalog and schema parameters", ) parser.add_argument("--catalog", required=True) parser.add_argument("--schema", required=True) diff --git a/pydabs/src/pydabs_etl/transformations/sample_zones_pydabs.py b/pydabs/src/pydabs_etl/transformations/sample_zones_pydabs.py index 8b2d9aef..09a44c69 100644 --- a/pydabs/src/pydabs_etl/transformations/sample_zones_pydabs.py +++ b/pydabs/src/pydabs_etl/transformations/sample_zones_pydabs.py @@ -11,7 +11,5 @@ def sample_zones_pydabs(): # Read from the "sample_trips" table, then sum all the fares return ( - spark.read.table(f"sample_trips_pydabs") - .groupBy(col("pickup_zip")) - .agg(sum("fare_amount").alias("total_fare")) + spark.read.table(f"sample_trips_pydabs").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare")) ) diff --git a/pydabs/tests/conftest.py b/pydabs/tests/conftest.py index 4df274fd..72ebfeb5 100644 --- a/pydabs/tests/conftest.py +++ b/pydabs/tests/conftest.py @@ -1,8 +1,4 @@ -"""This file configures pytest. - -This file is in the root since it can be used for tests in any place in this -project, including tests under resources/. -""" +"""This file configures pytest, initializes Databricks Connect, and provides fixtures for Spark and loading test data.""" import os, sys, pathlib from contextlib import contextmanager diff --git a/scripts/update_from_templates.sh b/scripts/update_from_templates.sh index d66cb3e8..bbab0076 100755 --- a/scripts/update_from_templates.sh +++ b/scripts/update_from_templates.sh @@ -81,7 +81,7 @@ init_bundle "default-sql" "853cd9bc-631c-4d4f-bca0-3195c7540854" '{ "project_name": "default_sql", "http_path": "/sql/1.0/warehouses/abcdef1234567890", "default_catalog": "catalog", - "personal_schemas": "yes, automatically use a schema based on the current user name during development" + "personal_schemas": "yes" }' init_bundle "dbt-sql" "5e5ca8d5-0388-473e-84a1-1414ed89c5df" '{ @@ -89,7 +89,7 @@ init_bundle "dbt-sql" "5e5ca8d5-0388-473e-84a1-1414ed89c5df" '{ "http_path": "/sql/1.0/warehouses/abcdef1234567890", "serverless": "yes", "default_catalog": "catalog", - "personal_schemas": "yes, use a schema based on the current user name during development" + "personal_schemas": "yes" }' init_bundle "lakeflow-pipelines" "295000fc-1ea8-4f43-befe-d5fb9f7d4ad4" '{ @@ -99,7 +99,6 @@ init_bundle "lakeflow-pipelines" "295000fc-1ea8-4f43-befe-d5fb9f7d4ad4" '{ "language": "sql" }' - init_bundle "lakeflow-pipelines" "87a174ba-60e4-4867-a140-1936bc9b00de" '{ "project_name": "lakeflow_pipelines_python", "default_catalog": "catalog",