From a266bc15db0e3da0f5eddbf2ff44109431d8e4b2 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 12 Mar 2025 12:34:54 +0100 Subject: [PATCH 1/5] Fix acc../interactive_cluster to use correct config Follow #2471 to where incorrect config is used. Modify all tests in integration_whl to print rendered databricks.yml Add $NODE_TYPE_ID env var. --- acceptance/acceptance_test.go | 12 ++++++ .../bundle/integration_whl/base/output.txt | 34 ++++++++++++++++- acceptance/bundle/integration_whl/base/script | 3 +- .../integration_whl/custom_params/output.txt | 34 ++++++++++++++++- .../integration_whl/custom_params/script | 3 +- .../interactive_cluster/output.txt | 36 +++++++++++++++++- .../interactive_cluster/script | 5 ++- .../bundle/integration_whl/wrapper/output.txt | 37 ++++++++++++++++++- .../bundle/integration_whl/wrapper/script | 3 +- .../wrapper_custom_params/output.txt | 37 ++++++++++++++++++- .../wrapper_custom_params/script | 3 +- 11 files changed, 196 insertions(+), 11 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 4a38e6f13b..272b8a559f 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -172,6 +172,7 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { // Matches defaultSparkVersion in ../integration/bundle/helpers_test.go t.Setenv("DEFAULT_SPARK_VERSION", "13.3.x-snapshot-scala2.12") + t.Setenv("NODE_TYPE_ID", getNodeTypeID(cloudEnv)) testDirs := getTests(t) require.NotEmpty(t, testDirs) @@ -795,3 +796,14 @@ func runWithLog(t *testing.T, cmd *exec.Cmd, out *os.File, tail bool) error { return <-processErrCh } + +func getNodeTypeID(cloudEnv string) string { + switch cloudEnv { + case "azure": + return "Standard_DS4_v2" + case "gcp": + return "n1-standard-4" + default: // "aws", "", invalid values: + return "i3.xlarge" + } +} diff --git a/acceptance/bundle/integration_whl/base/output.txt b/acceptance/bundle/integration_whl/base/output.txt index a6aadac831..744f6a82dd 100644 --- a/acceptance/bundle/integration_whl/base/output.txt +++ b/acceptance/bundle/integration_whl/base/output.txt @@ -1,13 +1,45 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "", + "node_type_id": "i3.xlarge", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "13.3.x-snapshot-scala2.12" + node_type_id: "i3.xlarge" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/base/script b/acceptance/bundle/integration_whl/base/script index 06c3bffdc5..19418f5d4a 100644 --- a/acceptance/bundle/integration_whl/base/script +++ b/acceptance/bundle/integration_whl/base/script @@ -1,8 +1,9 @@ export SPARK_VERSION=$DEFAULT_SPARK_VERSION export PYTHON_WHEEL_WRAPPER=false envsubst < input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init . --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job diff --git a/acceptance/bundle/integration_whl/custom_params/output.txt b/acceptance/bundle/integration_whl/custom_params/output.txt index f4715eab79..842a492a71 100644 --- a/acceptance/bundle/integration_whl/custom_params/output.txt +++ b/acceptance/bundle/integration_whl/custom_params/output.txt @@ -1,13 +1,45 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "", + "node_type_id": "i3.xlarge", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "13.3.x-snapshot-scala2.12" + node_type_id: "i3.xlarge" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/custom_params/script b/acceptance/bundle/integration_whl/custom_params/script index 3abb7aafa9..da7ba68f81 100644 --- a/acceptance/bundle/integration_whl/custom_params/script +++ b/acceptance/bundle/integration_whl/custom_params/script @@ -1,8 +1,9 @@ export SPARK_VERSION=$DEFAULT_SPARK_VERSION export PYTHON_WHEEL_WRAPPER=false envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init $TESTDIR/../base --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job --python-params param1,param2 diff --git a/acceptance/bundle/integration_whl/interactive_cluster/output.txt b/acceptance/bundle/integration_whl/interactive_cluster/output.txt index a6aadac831..c0754f3a61 100644 --- a/acceptance/bundle/integration_whl/interactive_cluster/output.txt +++ b/acceptance/bundle/integration_whl/interactive_cluster/output.txt @@ -1,13 +1,46 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "", + "node_type_id": "i3.xlarge", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + +resources: + clusters: + test_cluster: + cluster_name: "test-cluster-[UNIQUE_NAME]" + spark_version: "13.3.x-snapshot-scala2.12" + node_type_id: "i3.xlarge" + num_workers: 1 + data_security_mode: USER_ISOLATION + + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... @@ -27,6 +60,7 @@ Got arguments: >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: + delete cluster test_cluster delete job some_other_job All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME] diff --git a/acceptance/bundle/integration_whl/interactive_cluster/script b/acceptance/bundle/integration_whl/interactive_cluster/script index 7e10d6299d..65b4ee2d20 100644 --- a/acceptance/bundle/integration_whl/interactive_cluster/script +++ b/acceptance/bundle/integration_whl/interactive_cluster/script @@ -1,8 +1,9 @@ export SPARK_VERSION=$DEFAULT_SPARK_VERSION export PYTHON_WHEEL_WRAPPER=false envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json -$CLI bundle init $TESTDIR/../base --config-file input.json +trace cat input.json +$CLI bundle init . --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job diff --git a/acceptance/bundle/integration_whl/wrapper/output.txt b/acceptance/bundle/integration_whl/wrapper/output.txt index ca1819f3c8..5915ba34ed 100644 --- a/acceptance/bundle/integration_whl/wrapper/output.txt +++ b/acceptance/bundle/integration_whl/wrapper/output.txt @@ -1,13 +1,48 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "12.2.x-scala2.12", - "node_type_id": "", + "node_type_id": "i3.xlarge", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": true, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + +experimental: + python_wheel_wrapper: true + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "12.2.x-scala2.12" + node_type_id: "i3.xlarge" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/wrapper/script b/acceptance/bundle/integration_whl/wrapper/script index ee57bc783e..3e6afcfafb 100644 --- a/acceptance/bundle/integration_whl/wrapper/script +++ b/acceptance/bundle/integration_whl/wrapper/script @@ -5,8 +5,9 @@ export SPARK_VERSION=12.2.x-scala2.12 export PYTHON_WHEEL_WRAPPER=true envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init $TESTDIR/../base --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job diff --git a/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt b/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt index b5f97d5bce..e902b3c7b4 100644 --- a/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt +++ b/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt @@ -1,13 +1,48 @@ + +>>> cat input.json { "project_name": "my_test_code", "spark_version": "12.2.x-scala2.12", - "node_type_id": "", + "node_type_id": "i3.xlarge", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": true, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" } ✨ Successfully initialized template +>>> cat databricks.yml +bundle: + name: wheel-task + +workspace: + root_path: "~/.bundle/[UNIQUE_NAME]" + + +experimental: + python_wheel_wrapper: true + + +resources: + jobs: + some_other_job: + name: "[${bundle.target}] Test Wheel Job [UNIQUE_NAME]" + tasks: + - task_key: TestTask + new_cluster: + num_workers: 1 + spark_version: "12.2.x-scala2.12" + node_type_id: "i3.xlarge" + data_security_mode: USER_ISOLATION + instance_pool_id: "[TEST_INSTANCE_POOL_ID]" + python_wheel_task: + package_name: my_test_code + entry_point: run + parameters: + - "one" + - "two" + libraries: + - whl: ./dist/*.whl + >>> [CLI] bundle deploy Building python_artifact... Uploading my_test_code-0.0.1-py3-none-any.whl... diff --git a/acceptance/bundle/integration_whl/wrapper_custom_params/script b/acceptance/bundle/integration_whl/wrapper_custom_params/script index c92f7162db..c92be51fae 100644 --- a/acceptance/bundle/integration_whl/wrapper_custom_params/script +++ b/acceptance/bundle/integration_whl/wrapper_custom_params/script @@ -1,8 +1,9 @@ export SPARK_VERSION=12.2.x-scala2.12 export PYTHON_WHEEL_WRAPPER=true envsubst < $TESTDIR/../base/input.json.tmpl > input.json -cat input.json +trace cat input.json $CLI bundle init $TESTDIR/../base --config-file input.json +trace cat databricks.yml trap "errcode trace '$CLI' bundle destroy --auto-approve" EXIT trace $CLI bundle deploy trace $CLI bundle run some_other_job --python-params param1,param2 From 0b90826576c661a3495b6a708d41f7dba6683e3a Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 12 Mar 2025 13:43:23 +0100 Subject: [PATCH 2/5] add NODE_TYPE_ID replacement --- acceptance/acceptance_test.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 272b8a559f..e5e983da63 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -172,7 +172,10 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { // Matches defaultSparkVersion in ../integration/bundle/helpers_test.go t.Setenv("DEFAULT_SPARK_VERSION", "13.3.x-snapshot-scala2.12") - t.Setenv("NODE_TYPE_ID", getNodeTypeID(cloudEnv)) + + nodeTypeID := getNodeTypeID(cloudEnv) + t.Setenv("NODE_TYPE_ID", nodeTypeID) + repls.Set(nodeTypeID, "[NODE_TYPE_ID]") testDirs := getTests(t) require.NotEmpty(t, testDirs) @@ -799,11 +802,15 @@ func runWithLog(t *testing.T, cmd *exec.Cmd, out *os.File, tail bool) error { func getNodeTypeID(cloudEnv string) string { switch cloudEnv { + case "aws": + return "i3.xlarge" case "azure": return "Standard_DS4_v2" case "gcp": return "n1-standard-4" - default: // "aws", "", invalid values: - return "i3.xlarge" + case "": + return "local-fake-node" + default: + return "unknown-cloudEnv-" + cloudEnv } } From 19c03fac4c17631724c9a71aa0c49e95180e1f60 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 12 Mar 2025 13:55:07 +0100 Subject: [PATCH 3/5] update NODE_TYPE_ID calculation --- acceptance/acceptance_test.go | 8 ++++++-- .../bundle/integration_whl/interactive_cluster/output.txt | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index e5e983da63..c3195a38f7 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -802,11 +802,15 @@ func runWithLog(t *testing.T, cmd *exec.Cmd, out *os.File, tail bool) error { func getNodeTypeID(cloudEnv string) string { switch cloudEnv { - case "aws": + // no idea why, but + // aws-prod-ucws sets CLOUD_ENV to "ucws" + // gcp-prod-ucws sets CLOUD_ENV to "gcp-ucws" + // azure-prod-ucws sets CLOUD_ENV to "azure" + case "aws", "ucws": return "i3.xlarge" case "azure": return "Standard_DS4_v2" - case "gcp": + case "gcp", "gcp-ucws": return "n1-standard-4" case "": return "local-fake-node" diff --git a/acceptance/bundle/integration_whl/interactive_cluster/output.txt b/acceptance/bundle/integration_whl/interactive_cluster/output.txt index c0754f3a61..77b99ace28 100644 --- a/acceptance/bundle/integration_whl/interactive_cluster/output.txt +++ b/acceptance/bundle/integration_whl/interactive_cluster/output.txt @@ -3,7 +3,7 @@ { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "i3.xlarge", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" @@ -22,7 +22,7 @@ resources: test_cluster: cluster_name: "test-cluster-[UNIQUE_NAME]" spark_version: "13.3.x-snapshot-scala2.12" - node_type_id: "i3.xlarge" + node_type_id: "[NODE_TYPE_ID]" num_workers: 1 data_security_mode: USER_ISOLATION From e6bbd8b1ebfbf114a5f63dded30c86636d92d52b Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 12 Mar 2025 13:57:43 +0100 Subject: [PATCH 4/5] update output files --- acceptance/bundle/integration_whl/base/output.txt | 4 ++-- acceptance/bundle/integration_whl/custom_params/output.txt | 4 ++-- acceptance/bundle/integration_whl/wrapper/output.txt | 4 ++-- .../bundle/integration_whl/wrapper_custom_params/output.txt | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/acceptance/bundle/integration_whl/base/output.txt b/acceptance/bundle/integration_whl/base/output.txt index 744f6a82dd..d42daeaf36 100644 --- a/acceptance/bundle/integration_whl/base/output.txt +++ b/acceptance/bundle/integration_whl/base/output.txt @@ -3,7 +3,7 @@ { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "i3.xlarge", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" @@ -28,7 +28,7 @@ resources: new_cluster: num_workers: 1 spark_version: "13.3.x-snapshot-scala2.12" - node_type_id: "i3.xlarge" + node_type_id: "[NODE_TYPE_ID]" data_security_mode: USER_ISOLATION instance_pool_id: "[TEST_INSTANCE_POOL_ID]" python_wheel_task: diff --git a/acceptance/bundle/integration_whl/custom_params/output.txt b/acceptance/bundle/integration_whl/custom_params/output.txt index 842a492a71..19c1d87ce2 100644 --- a/acceptance/bundle/integration_whl/custom_params/output.txt +++ b/acceptance/bundle/integration_whl/custom_params/output.txt @@ -3,7 +3,7 @@ { "project_name": "my_test_code", "spark_version": "13.3.x-snapshot-scala2.12", - "node_type_id": "i3.xlarge", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": false, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" @@ -28,7 +28,7 @@ resources: new_cluster: num_workers: 1 spark_version: "13.3.x-snapshot-scala2.12" - node_type_id: "i3.xlarge" + node_type_id: "[NODE_TYPE_ID]" data_security_mode: USER_ISOLATION instance_pool_id: "[TEST_INSTANCE_POOL_ID]" python_wheel_task: diff --git a/acceptance/bundle/integration_whl/wrapper/output.txt b/acceptance/bundle/integration_whl/wrapper/output.txt index 5915ba34ed..06e97bb27f 100644 --- a/acceptance/bundle/integration_whl/wrapper/output.txt +++ b/acceptance/bundle/integration_whl/wrapper/output.txt @@ -3,7 +3,7 @@ { "project_name": "my_test_code", "spark_version": "12.2.x-scala2.12", - "node_type_id": "i3.xlarge", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": true, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" @@ -31,7 +31,7 @@ resources: new_cluster: num_workers: 1 spark_version: "12.2.x-scala2.12" - node_type_id: "i3.xlarge" + node_type_id: "[NODE_TYPE_ID]" data_security_mode: USER_ISOLATION instance_pool_id: "[TEST_INSTANCE_POOL_ID]" python_wheel_task: diff --git a/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt b/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt index e902b3c7b4..c17ba0f8b0 100644 --- a/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt +++ b/acceptance/bundle/integration_whl/wrapper_custom_params/output.txt @@ -3,7 +3,7 @@ { "project_name": "my_test_code", "spark_version": "12.2.x-scala2.12", - "node_type_id": "i3.xlarge", + "node_type_id": "[NODE_TYPE_ID]", "unique_id": "[UNIQUE_NAME]", "python_wheel_wrapper": true, "instance_pool_id": "[TEST_INSTANCE_POOL_ID]" @@ -31,7 +31,7 @@ resources: new_cluster: num_workers: 1 spark_version: "12.2.x-scala2.12" - node_type_id: "i3.xlarge" + node_type_id: "[NODE_TYPE_ID]" data_security_mode: USER_ISOLATION instance_pool_id: "[TEST_INSTANCE_POOL_ID]" python_wheel_task: From 5bec77724843a2ad333dd37c5a89cfecc828d8af Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 12 Mar 2025 14:25:44 +0100 Subject: [PATCH 5/5] disable bundle/override on cloud --- acceptance/bundle/override/test.toml | 1 + 1 file changed, 1 insertion(+) create mode 100644 acceptance/bundle/override/test.toml diff --git a/acceptance/bundle/override/test.toml b/acceptance/bundle/override/test.toml new file mode 100644 index 0000000000..18b1a88417 --- /dev/null +++ b/acceptance/bundle/override/test.toml @@ -0,0 +1 @@ +Cloud = false