diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 4eae3617af..601eaae151 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -3,6 +3,7 @@ ## Release v0.248.0 ### Notable Changes +* Fixed a regression with pipeline library globs introduced in 0.247.0 ([#2723](https://github.com/databricks/cli/pull/2723)). The issue caused glob patterns to fail when using paths relative to a directory that is not the bundle root. ### Dependency updates diff --git a/acceptance/bundle/debug/out.stderr.txt b/acceptance/bundle/debug/out.stderr.txt index 3db100bec8..490d984a7f 100644 --- a/acceptance/bundle/debug/out.stderr.txt +++ b/acceptance/bundle/debug/out.stderr.txt @@ -40,7 +40,8 @@ 10:07:59 Debug: Apply pid=12345 mutator=ResolveVariableReferences 10:07:59 Debug: Apply pid=12345 mutator=ApplyTargetMode 10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources -10:07:59 Debug: Apply pid=12345 mutator=NormalizePaths +10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources mutator=ResolveVariableReferences(resources) +10:07:59 Debug: Apply pid=12345 mutator=ProcessStaticResources mutator=NormalizePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(init) 10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) diff --git a/acceptance/bundle/paths/invalid_pipeline_globs/databricks.yml b/acceptance/bundle/paths/invalid_pipeline_globs/databricks.yml new file mode 100644 index 0000000000..6a94727dde --- /dev/null +++ b/acceptance/bundle/paths/invalid_pipeline_globs/databricks.yml @@ -0,0 +1,13 @@ +bundle: + name: invalid_pipeline_glob_paths + +resources: + pipelines: + nyc_taxi_pipeline: + libraries: + - notebook: { path: "${var.notebook_dir}/*.ipynb" } + +variables: + notebook_dir: + description: Directory with DLT notebooks + default: non-existent diff --git a/acceptance/bundle/paths/invalid_pipeline_globs/output.txt b/acceptance/bundle/paths/invalid_pipeline_globs/output.txt new file mode 100644 index 0000000000..a8a5784aa7 --- /dev/null +++ b/acceptance/bundle/paths/invalid_pipeline_globs/output.txt @@ -0,0 +1,22 @@ + +>>> [CLI] bundle validate --output json +Error: notebook non-existent/*.ipynb not found + + +Exit code: 1 +{ + "resources": { + "pipelines": { + "nyc_taxi_pipeline": { + "libraries": [ + { + "notebook": { + "path": "non-existent/*.ipynb" + } + } + ], + "permissions": [] + } + } + } +} diff --git a/acceptance/bundle/paths/invalid_pipeline_globs/script b/acceptance/bundle/paths/invalid_pipeline_globs/script new file mode 100644 index 0000000000..a6a93f74a7 --- /dev/null +++ b/acceptance/bundle/paths/invalid_pipeline_globs/script @@ -0,0 +1 @@ +errcode trace $CLI bundle validate --output json | jq 'pick(.resources)' diff --git a/acceptance/bundle/paths/pipeline_expected_file_got_notebook/databricks.yml b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/databricks.yml new file mode 100644 index 0000000000..7d176f0cd5 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: pipeline_expected_file_got_notebook + +include: + - resources/pipeline.yml + +variables: + notebook_dir: + description: Directory with DLT notebooks + default: notebooks diff --git a/bundle/tests/pipeline_glob_paths/dlt/nyc_taxi_loader.py b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/notebooks/nyc_taxi_loader.py similarity index 100% rename from bundle/tests/pipeline_glob_paths/dlt/nyc_taxi_loader.py rename to acceptance/bundle/paths/pipeline_expected_file_got_notebook/notebooks/nyc_taxi_loader.py diff --git a/acceptance/bundle/paths/pipeline_expected_file_got_notebook/output.txt b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/output.txt new file mode 100644 index 0000000000..e294bf6c1c --- /dev/null +++ b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/output.txt @@ -0,0 +1,22 @@ + +>>> [CLI] bundle validate --output json +Error: expected a file for "resources.pipelines.nyc_taxi_pipeline.libraries[0].file.path" but got a notebook: file at [TEST_TMP_DIR]/notebooks/nyc_taxi_loader.py is a notebook + + +Exit code: 1 +{ + "resources": { + "pipelines": { + "nyc_taxi_pipeline": { + "libraries": [ + { + "file": { + "path": "notebooks/nyc_taxi_loader.py" + } + } + ], + "permissions": [] + } + } + } +} diff --git a/acceptance/bundle/paths/pipeline_expected_file_got_notebook/resources/pipeline.yml b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/resources/pipeline.yml new file mode 100644 index 0000000000..d4a7e2b037 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/resources/pipeline.yml @@ -0,0 +1,6 @@ +resources: + pipelines: + nyc_taxi_pipeline: + libraries: + # path points to a notebook, not a file, it should error out + - file: { path: "../${var.notebook_dir}/*.py" } diff --git a/acceptance/bundle/paths/pipeline_expected_file_got_notebook/script b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/script new file mode 100644 index 0000000000..a6a93f74a7 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_expected_file_got_notebook/script @@ -0,0 +1 @@ +errcode trace $CLI bundle validate --output json | jq 'pick(.resources)' diff --git a/acceptance/bundle/paths/pipeline_globs/files/wrong_file.py b/acceptance/bundle/paths/pipeline_globs/files/wrong_file.py new file mode 100644 index 0000000000..a798241447 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/files/wrong_file.py @@ -0,0 +1 @@ +# this file should never be used, because glob points to a different file diff --git a/acceptance/bundle/paths/pipeline_globs/notebooks/wrong_file.py b/acceptance/bundle/paths/pipeline_globs/notebooks/wrong_file.py new file mode 100644 index 0000000000..8af01a95aa --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/notebooks/wrong_file.py @@ -0,0 +1,2 @@ +# Databricks notebook source +# this file should never be used, because glob points to a different file diff --git a/acceptance/bundle/paths/pipeline_globs/output.txt b/acceptance/bundle/paths/pipeline_globs/output.txt new file mode 100644 index 0000000000..e4c8a2c4c2 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/output.txt @@ -0,0 +1,60 @@ + +>>> [CLI] bundle validate --output json +{ + "resources": { + "pipelines": { + "nyc_taxi_pipeline": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/pipeline_glob_paths/default/state/metadata.json" + }, + "libraries": [ + { + "notebook": { + "path": "/Workspace/Users/[USERNAME]/.bundle/pipeline_glob_paths/default/files/notebooks/nyc_taxi_loader" + } + }, + { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/pipeline_glob_paths/default/files/files/nyc_taxi_loader.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/[USERNAME]/.bundle/pipeline_glob_paths/default/files/notebooks/nyc_taxi_loader" + } + }, + { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/pipeline_glob_paths/default/files/files/nyc_taxi_loader.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/[USERNAME]/.bundle/pipeline_glob_paths/default/files/notebooks/nyc_taxi_loader" + } + }, + { + "maven": { + "coordinates": "org.jsoup:jsoup:1.7.2" + } + }, + { + "jar": "*/*.jar" + }, + { + "notebook": { + "path": "/Workspace/Users/me@company.com/*.ipynb" + } + }, + { + "notebook": { + "path": "s3://notebooks/*.ipynb" + } + } + ], + "permissions": [] + } + } + } +} diff --git a/acceptance/bundle/paths/pipeline_globs/root/databricks.yml b/acceptance/bundle/paths/pipeline_globs/root/databricks.yml new file mode 100644 index 0000000000..a2b3f77698 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/root/databricks.yml @@ -0,0 +1,13 @@ +bundle: + name: pipeline_glob_paths + +include: + - resources/pipeline.yml + +variables: + notebook_dir: + description: Directory with DLT notebooks + default: notebooks + file_dir: + description: Directory with DLT files + default: files diff --git a/acceptance/bundle/paths/pipeline_globs/root/files/nyc_taxi_loader.py b/acceptance/bundle/paths/pipeline_globs/root/files/nyc_taxi_loader.py new file mode 100644 index 0000000000..9089eea9d6 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/root/files/nyc_taxi_loader.py @@ -0,0 +1 @@ +print("Hello from file!") diff --git a/acceptance/bundle/paths/pipeline_globs/root/notebooks/nyc_taxi_loader.py b/acceptance/bundle/paths/pipeline_globs/root/notebooks/nyc_taxi_loader.py new file mode 100644 index 0000000000..83181c7099 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/root/notebooks/nyc_taxi_loader.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from notebook!") diff --git a/acceptance/bundle/paths/pipeline_globs/root/resources/files/wrong_file.py b/acceptance/bundle/paths/pipeline_globs/root/resources/files/wrong_file.py new file mode 100644 index 0000000000..a798241447 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/root/resources/files/wrong_file.py @@ -0,0 +1 @@ +# this file should never be used, because glob points to a different file diff --git a/acceptance/bundle/paths/pipeline_globs/root/resources/notebooks/wrong_file.py b/acceptance/bundle/paths/pipeline_globs/root/resources/notebooks/wrong_file.py new file mode 100644 index 0000000000..8af01a95aa --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/root/resources/notebooks/wrong_file.py @@ -0,0 +1,2 @@ +# Databricks notebook source +# this file should never be used, because glob points to a different file diff --git a/acceptance/bundle/paths/pipeline_globs/root/resources/pipeline.yml b/acceptance/bundle/paths/pipeline_globs/root/resources/pipeline.yml new file mode 100644 index 0000000000..89e4d33ada --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/root/resources/pipeline.yml @@ -0,0 +1,18 @@ +resources: + pipelines: + nyc_taxi_pipeline: + libraries: + # globs for notebooks and files are expanded + - notebook: { path: "../${var.notebook_dir}/*" } + - file: { path: "../${var.file_dir}/*" } + # globs can include file extensions + - notebook: { path: "../${var.notebook_dir}/*.py" } + - file: { path: "../${var.file_dir}/*.py" } + # non-glob files work + - notebook: { path: "../${var.notebook_dir}/nyc_taxi_loader.py" } + # maven libraries and jars remain as-is + - maven: { coordinates: "org.jsoup:jsoup:1.7.2" } + - jar: "*/*.jar" + # absolute paths and paths using URLs remain as-is + - notebook: { path: "/Workspace/Users/me@company.com/*.ipynb" } + - notebook: { path: "s3://notebooks/*.ipynb" } diff --git a/acceptance/bundle/paths/pipeline_globs/script b/acceptance/bundle/paths/pipeline_globs/script new file mode 100644 index 0000000000..d6b3baf2a3 --- /dev/null +++ b/acceptance/bundle/paths/pipeline_globs/script @@ -0,0 +1,3 @@ +cd root +trace $CLI bundle validate --output json | jq 'pick(.resources)' +rm -rf .databricks diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 5935d370a0..cef71c62b3 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" ) @@ -32,6 +33,27 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia return diag.FromErr(err) } + // only YAML resources need to have paths normalized, before normalizing paths + // we need to resolve variables because they can change path values: + // - variable can be used a prefix + // - path can be part of a complex variable value + diags := bundle.ApplySeq( + ctx, + b, + // Reads (dynamic): * (strings) (searches for variable references in string values) + // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) + // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes + mutator.ResolveVariableReferencesOnlyResources( + "bundle", + "workspace", + "variables", + ), + mutator.NormalizePaths(), + ) + if diags.HasError() { + return diags + } + return NormalizeAndInitializeResources(ctx, b, addedResources) } diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 21e1cebe62..bc66f3da9d 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -121,7 +121,6 @@ func Initialize(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { // Static resources (e.g. YAML) are already loaded, we initialize and normalize them before Python resourcemutator.ProcessStaticResources(), - mutator.NormalizePaths(), pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseLoad), pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseInit), diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index fd200a5b65..20d096cc93 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -9,12 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/phases" - "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/iam" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -54,26 +49,3 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { )) return b, diags } - -func configureMock(t *testing.T, b *bundle.Bundle) { - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) -} - -func initializeTarget(t *testing.T, path, env string) (*bundle.Bundle, diag.Diagnostics) { - b := load(t, path) - configureMock(t, b) - - ctx := dbr.MockRuntime(context.Background(), dbr.Environment{}) - diags := bundle.Apply(ctx, b, mutator.SelectTarget(env)) - diags = diags.Extend(phases.Initialize(ctx, b)) - - return b, diags -} diff --git a/bundle/tests/pipeline_glob_paths/databricks.yml b/bundle/tests/pipeline_glob_paths/databricks.yml deleted file mode 100644 index d25b977ba2..0000000000 --- a/bundle/tests/pipeline_glob_paths/databricks.yml +++ /dev/null @@ -1,24 +0,0 @@ -bundle: - name: pipeline_glob_paths - -resources: - pipelines: - nyc_taxi_pipeline: - name: "nyc taxi loader" - libraries: - - notebook: - path: ./dlt/* - -targets: - default: - default: true - - error: - default: false - - resources: - pipelines: - nyc_taxi_pipeline: - libraries: - - notebook: - path: ./non-existent diff --git a/bundle/tests/pipeline_glob_paths_test.go b/bundle/tests/pipeline_glob_paths_test.go deleted file mode 100644 index 9053c658fc..0000000000 --- a/bundle/tests/pipeline_glob_paths_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestExpandPipelineGlobPaths(t *testing.T) { - b, diags := initializeTarget(t, "./pipeline_glob_paths", "default") - require.NoError(t, diags.Error()) - require.Equal( - t, - "/Workspace/Users/user@domain.com/.bundle/pipeline_glob_paths/default/files/dlt/nyc_taxi_loader", - b.Config.Resources.Pipelines["nyc_taxi_pipeline"].Libraries[0].Notebook.Path, - ) -} - -func TestExpandPipelineGlobPathsWithNonExistent(t *testing.T) { - _, diags := initializeTarget(t, "./pipeline_glob_paths", "error") - require.ErrorContains(t, diags.Error(), `notebook "non-existent" not found`) -}