diff --git a/knowledge_base/development_cluster/README.md b/knowledge_base/development_cluster/README.md new file mode 100644 index 00000000..af48ec14 --- /dev/null +++ b/knowledge_base/development_cluster/README.md @@ -0,0 +1,24 @@ +# Development cluster + +This example demonstrates how to define and use a development (all-purpose) cluster in a Databricks Asset Bundle. + +This bundle defines an `example_job` which is run on a job cluster in production mode. + +For the development mode (default `dev` target) the job is overriden to use a development cluster which is provisioned +as part of the bundle deployment as well. + +For more information, please refer to the [documentation](https://docs.databricks.com/en/dev-tools/bundles/settings.html#clusters). + +## Prerequisites + +* Databricks CLI v0.229.0 or above + +## Usage + +Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to. + +Run `databricks bundle deploy` to deploy the job. It's deployed to `dev` target with a defined `development_cluster` cluster. + +Run `databricks bundle deploy -t prod` to deploy the job to prod target. It's deployed with a job cluster instead of development one. + +Run `databricks bundle run example_job` to run the job. diff --git a/knowledge_base/development_cluster/databricks.yml b/knowledge_base/development_cluster/databricks.yml new file mode 100644 index 00000000..2b5c7350 --- /dev/null +++ b/knowledge_base/development_cluster/databricks.yml @@ -0,0 +1,41 @@ +bundle: + name: development_cluster + +include: + - resources/*.yml + +workspace: + host: https://e2-dogfood.staging.cloud.databricks.com + +targets: + dev: + mode: development + default: true + + # By configuring this field for the "dev" target, all jobs in this bundle + # are overridden to use the all-purpose cluster defined below. + # + # This can increase the speed of development when iterating on code and job definitions, + # as you don't have to wait for job clusters to start for every job run. + # + # Note: make sure that the cluster configuration below matches the job cluster + # definition that will be used when deploying the other targets. + cluster_id: ${resources.clusters.development_cluster.id} + + resources: + clusters: + development_cluster: + cluster_name: Development cluster + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + num_workers: 0 + autotermination_minutes: 30 + spark_conf: + "spark.databricks.cluster.profile": "singleNode" + "spark.master": "local[*, 4]" + custom_tags: + "ResourceClass": "SingleNode" + + prod: { + # No overrides + } diff --git a/knowledge_base/development_cluster/resources/example_job.yml b/knowledge_base/development_cluster/resources/example_job.yml new file mode 100644 index 00000000..d702cf55 --- /dev/null +++ b/knowledge_base/development_cluster/resources/example_job.yml @@ -0,0 +1,22 @@ +resources: + jobs: + example_job: + name: "Example job to demonstrate using an interactive cluster for development" + + tasks: + - task_key: notebook + job_cluster_key: cluster + notebook_task: + notebook_path: ../src/hello.py + + job_clusters: + - job_cluster_key: cluster + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + num_workers: 0 + spark_conf: + "spark.databricks.cluster.profile": "singleNode" + "spark.master": "local[*, 4]" + custom_tags: + "ResourceClass": "SingleNode" diff --git a/knowledge_base/development_cluster/src/hello.py b/knowledge_base/development_cluster/src/hello.py new file mode 100644 index 00000000..24dc150f --- /dev/null +++ b/knowledge_base/development_cluster/src/hello.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello, World!")