From f10b6e4669cabc08ca24318962f0ffb00bba19bf Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 5 Feb 2025 11:09:51 +0000 Subject: [PATCH 01/11] Raise an error when there are multiple locallibraries with the same basename are used --- bundle/libraries/same_name_libraries.go | 72 +++++++++++++++ bundle/libraries/same_name_libraries_test.go | 92 ++++++++++++++++++++ bundle/phases/deploy.go | 1 + 3 files changed, 165 insertions(+) create mode 100644 bundle/libraries/same_name_libraries.go create mode 100644 bundle/libraries/same_name_libraries_test.go diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go new file mode 100644 index 0000000000..301791060e --- /dev/null +++ b/bundle/libraries/same_name_libraries.go @@ -0,0 +1,72 @@ +package libraries + +import ( + "context" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type checkForSameNameLibraries struct{} + +var patterns = []dyn.Pattern{ + taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), + forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), + envDepsPattern.Append(dyn.AnyIndex()), +} + +func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + libBaseNames := make(map[string]bool) + + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + var err error + for _, pattern := range patterns { + v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) { + libPath := lv.MustString() + // If not local library, skip the check + if !IsLibraryLocal(libPath) { + return lv, nil + } + + lib := filepath.Base(lv.MustString()) + if libBaseNames[lib] { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Duplicate local library name", + Detail: "Local library names must be unique", + Locations: lv.Locations(), + Paths: []dyn.Path{p}, + }) + } + + libBaseNames[lib] = true + return lv, nil + }) + if err != nil { + return dyn.InvalidValue, err + } + } + + if err != nil { + return dyn.InvalidValue, err + } + + return v, nil + }) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + + return diags +} + +func (c checkForSameNameLibraries) Name() string { + return "CheckForSameNameLibraries" +} + +func CheckForSameNameLibraries() bundle.Mutator { + return checkForSameNameLibraries{} +} diff --git a/bundle/libraries/same_name_libraries_test.go b/bundle/libraries/same_name_libraries_test.go new file mode 100644 index 0000000000..98007b5072 --- /dev/null +++ b/bundle/libraries/same_name_libraries_test.go @@ -0,0 +1,92 @@ +package libraries + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/require" +) + +func TestSameNameLibraries(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test": { + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + Libraries: []compute.Library{ + { + Whl: "full/path/test.whl", + }, + }, + }, + { + Libraries: []compute.Library{ + { + Whl: "other/path/test.whl", + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries()) + require.Len(t, diags, 1) + require.Equal(t, diag.Error, diags[0].Severity) + require.Equal(t, "Duplicate local library name", diags[0].Summary) +} + +func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "test": { + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + Libraries: []compute.Library{ + { + Whl: "full/path/test-0.1.1.whl", + }, + + { + Whl: "cowsay", + }, + }, + }, + { + Libraries: []compute.Library{ + { + Whl: "other/path/test-0.1.0.whl", + }, + + { + Whl: "cowsay", + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries()) + require.Empty(t, diags) +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index c6ec04962e..02294b99da 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -155,6 +155,7 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { mutator.ValidateGitDetails(), artifacts.CleanUp(), libraries.ExpandGlobReferences(), + libraries.CheckForSameNameLibraries(), libraries.Upload(), trampoline.TransformWheelTask(), files.Upload(outputHandler), From 0391c5671476f4dde614fdc30ce79ad39e7892d6 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 5 Feb 2025 12:03:46 +0000 Subject: [PATCH 02/11] added missing comment --- bundle/phases/deploy.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 02294b99da..2e9211a7ef 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -155,6 +155,10 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { mutator.ValidateGitDetails(), artifacts.CleanUp(), libraries.ExpandGlobReferences(), + // libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we + // know what are the actual library paths. + // libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this + // mutator is part of the deploy step rather than validate. libraries.CheckForSameNameLibraries(), libraries.Upload(), trampoline.TransformWheelTask(), From aaf74bfde10e8fbb07a3594c6434efed705535c5 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 7 Feb 2025 15:44:49 +0000 Subject: [PATCH 03/11] fixes --- bundle/libraries/same_name_libraries.go | 47 +++++++++++++++----- bundle/libraries/same_name_libraries_test.go | 31 ++++++++++++- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go index 301791060e..88b96ab54b 100644 --- a/bundle/libraries/same_name_libraries.go +++ b/bundle/libraries/same_name_libraries.go @@ -17,9 +17,15 @@ var patterns = []dyn.Pattern{ envDepsPattern.Append(dyn.AnyIndex()), } +type libData struct { + fullPath string + locations []dyn.Location + paths []dyn.Path +} + func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { var diags diag.Diagnostics - libBaseNames := make(map[string]bool) + libs := make(map[string]*libData) err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { var err error @@ -31,18 +37,22 @@ func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) return lv, nil } - lib := filepath.Base(lv.MustString()) - if libBaseNames[lib] { - diags = append(diags, diag.Diagnostic{ - Severity: diag.Error, - Summary: "Duplicate local library name", - Detail: "Local library names must be unique", - Locations: lv.Locations(), - Paths: []dyn.Path{p}, - }) + libFullPath := lv.MustString() + lib := filepath.Base(libFullPath) + // If the same basename was seen already but full path is different + // then it's a duplicate. Add the location to the location list. + lp, ok := libs[lib] + if !ok { + libs[lib] = &libData{ + fullPath: libFullPath, + locations: []dyn.Location{lv.Location()}, + paths: []dyn.Path{p}, + } + } else if lp.fullPath != libFullPath { + lp.locations = append(lp.locations, lv.Location()) + lp.paths = append(lp.paths, p) } - libBaseNames[lib] = true return lv, nil }) if err != nil { @@ -56,6 +66,21 @@ func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) return v, nil }) + + // Iterate over all the libraries and check if there are any duplicates. + // Duplicates will have more than one location. + // If there are duplicates, add a diagnostic. + for lib, lv := range libs { + if len(lv.locations) > 1 { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Duplicate local library name " + lib, + Detail: "Local library names must be unique", + Locations: lv.locations, + Paths: lv.paths, + }) + } + } if err != nil { diags = diags.Extend(diag.FromErr(err)) } diff --git a/bundle/libraries/same_name_libraries_test.go b/bundle/libraries/same_name_libraries_test.go index 98007b5072..42c38773bb 100644 --- a/bundle/libraries/same_name_libraries_test.go +++ b/bundle/libraries/same_name_libraries_test.go @@ -7,7 +7,9 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" @@ -43,10 +45,30 @@ func TestSameNameLibraries(t *testing.T) { }, } + bundletest.SetLocation(b, "resources.jobs.test.tasks[0]", []dyn.Location{ + {File: "databricks.yml", Line: 10, Column: 1}, + }) + bundletest.SetLocation(b, "resources.jobs.test.tasks[1]", []dyn.Location{ + {File: "databricks.yml", Line: 20, Column: 1}, + }) + diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries()) require.Len(t, diags, 1) require.Equal(t, diag.Error, diags[0].Severity) - require.Equal(t, "Duplicate local library name", diags[0].Summary) + require.Equal(t, "Duplicate local library name test.whl", diags[0].Summary) + require.Equal(t, []dyn.Location{ + {File: "databricks.yml", Line: 10, Column: 1}, + {File: "databricks.yml", Line: 20, Column: 1}, + }, diags[0].Locations) + + paths := make([]string, 0) + for _, p := range diags[0].Paths { + paths = append(paths, p.String()) + } + require.Equal(t, []string{ + "resources.jobs.test.tasks[0].libraries[0].whl", + "resources.jobs.test.tasks[1].libraries[0].whl", + }, paths) } func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) { @@ -79,6 +101,13 @@ func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) { }, }, }, + { + Libraries: []compute.Library{ + { + Whl: "full/path/test-0.1.1.whl", // Use the same library as the first task + }, + }, + }, }, }, }, From 59108187fe98138c99059f33d11633c891accd7b Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 7 Feb 2025 17:36:15 +0000 Subject: [PATCH 04/11] added acc test --- .../same_name_libraries/databricks.yml | 50 +++++++++++++++++++ .../artifacts/same_name_libraries/output.txt | 14 ++++++ .../artifacts/same_name_libraries/script | 2 + .../artifacts/same_name_libraries/test.toml | 0 .../same_name_libraries/whl1/setup.py | 36 +++++++++++++ .../whl1/src/my_default_python/__init__.py | 1 + .../whl1/src/my_default_python/main.py | 1 + .../same_name_libraries/whl2/setup.py | 36 +++++++++++++ .../whl2/src/my_default_python/__init__.py | 1 + .../whl2/src/my_default_python/main.py | 1 + bundle/libraries/expand_glob_references.go | 2 +- 11 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 acceptance/bundle/artifacts/same_name_libraries/databricks.yml create mode 100644 acceptance/bundle/artifacts/same_name_libraries/output.txt create mode 100644 acceptance/bundle/artifacts/same_name_libraries/script create mode 100644 acceptance/bundle/artifacts/same_name_libraries/test.toml create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py diff --git a/acceptance/bundle/artifacts/same_name_libraries/databricks.yml b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml new file mode 100644 index 0000000000..a065bae768 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml @@ -0,0 +1,50 @@ +bundle: + name: same_name_libraries + +variables: + cluster: + default: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode + +artifacts: + whl1: + type: whl + path: ./whl1 + whl2: + type: whl + path: ./whl2 + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: task1 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl + - task_key: task2 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl2/dist/*.whl + - task_key: task3 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl diff --git a/acceptance/bundle/artifacts/same_name_libraries/output.txt b/acceptance/bundle/artifacts/same_name_libraries/output.txt new file mode 100644 index 0000000000..38cdd43c4e --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/output.txt @@ -0,0 +1,14 @@ + +>>> errcode [CLI] bundle deploy +Building whl1... +Building whl2... +Error: Duplicate local library name my_default_python-0.0.1-py3-none-any.whl + at resources.jobs.test.tasks[0].libraries[0].whl + resources.jobs.test.tasks[1].libraries[0].whl + in databricks.yml:36:15 + databricks.yml:43:15 + +Local library names must be unique + + +Exit code: 1 diff --git a/acceptance/bundle/artifacts/same_name_libraries/script b/acceptance/bundle/artifacts/same_name_libraries/script new file mode 100644 index 0000000000..6c899df07d --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle deploy +rm -rf whl1 whl2 diff --git a/acceptance/bundle/artifacts/same_name_libraries/test.toml b/acceptance/bundle/artifacts/same_name_libraries/test.toml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py new file mode 100644 index 0000000000..1afaf3a4f2 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py @@ -0,0 +1,36 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_default_python + +setup( + name="my_default_python", + version=my_default_python.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py new file mode 100644 index 0000000000..f102a9cadf --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py new file mode 100644 index 0000000000..11b15b1a45 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py new file mode 100644 index 0000000000..1afaf3a4f2 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py @@ -0,0 +1,36 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_default_python + +setup( + name="my_default_python", + version=my_default_python.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py new file mode 100644 index 0000000000..f102a9cadf --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py new file mode 100644 index 0000000000..11b15b1a45 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index bb19050452..7a808f6270 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic for _, match := range matches { output = append(output, dyn.NewValue(map[string]dyn.Value{ - libType: dyn.V(match), + libType: dyn.NewValue(match, lib.Locations()), }, lib.Locations())) } } From a379fb3cd3a1d6029de1249fec480a4c68b33b9c Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 26 Feb 2025 12:34:25 +0100 Subject: [PATCH 05/11] fixes --- .../bundle/artifacts/same_name_libraries/databricks.yml | 4 ++++ acceptance/bundle/artifacts/same_name_libraries/output.txt | 2 +- bundle/libraries/same_name_libraries.go | 6 +++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/acceptance/bundle/artifacts/same_name_libraries/databricks.yml b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml index a065bae768..d58674a64c 100644 --- a/acceptance/bundle/artifacts/same_name_libraries/databricks.yml +++ b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml @@ -34,6 +34,8 @@ resources: package_name: my_default_python libraries: - whl: ./whl1/dist/*.whl + - pypi: + package: test_package - task_key: task2 new_cluster: ${var.cluster} python_wheel_task: @@ -41,6 +43,8 @@ resources: package_name: my_default_python libraries: - whl: ./whl2/dist/*.whl + - maven: + coordinates: org.apache.spark:spark-sql_2.12:3.1.1 - task_key: task3 new_cluster: ${var.cluster} python_wheel_task: diff --git a/acceptance/bundle/artifacts/same_name_libraries/output.txt b/acceptance/bundle/artifacts/same_name_libraries/output.txt index 38cdd43c4e..1253d96809 100644 --- a/acceptance/bundle/artifacts/same_name_libraries/output.txt +++ b/acceptance/bundle/artifacts/same_name_libraries/output.txt @@ -6,7 +6,7 @@ Error: Duplicate local library name my_default_python-0.0.1-py3-none-any.whl at resources.jobs.test.tasks[0].libraries[0].whl resources.jobs.test.tasks[1].libraries[0].whl in databricks.yml:36:15 - databricks.yml:43:15 + databricks.yml:45:15 Local library names must be unique diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go index 88b96ab54b..2c029de365 100644 --- a/bundle/libraries/same_name_libraries.go +++ b/bundle/libraries/same_name_libraries.go @@ -31,7 +31,11 @@ func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) var err error for _, pattern := range patterns { v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) { - libPath := lv.MustString() + libPath, ok := lv.AsString() + if !ok { + return lv, nil + } + // If not local library, skip the check if !IsLibraryLocal(libPath) { return lv, nil From d933227eaf0411f0441b1548f039bb8dfefcde72 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 27 Feb 2025 13:17:43 +0100 Subject: [PATCH 06/11] added acc test + cleanup --- .../same_name_libraries/whl1/setup.py | 8 --- .../same_name_libraries/whl2/setup.py | 8 --- .../unique_name_libraries/databricks.yml | 56 +++++++++++++++++++ .../unique_name_libraries/output.txt | 10 ++++ .../artifacts/unique_name_libraries/script | 2 + .../artifacts/unique_name_libraries/test.toml | 6 ++ .../unique_name_libraries/whl1/setup.py | 28 ++++++++++ .../whl1/src/my_package/__init__.py | 1 + .../whl1/src/my_package/main.py | 1 + .../unique_name_libraries/whl2/setup.py | 28 ++++++++++ .../whl2/src/my_other_package/__init__.py | 1 + .../whl2/src/my_other_package/main.py | 1 + bundle/libraries/same_name_libraries.go | 25 ++++----- 13 files changed, 146 insertions(+), 29 deletions(-) create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/databricks.yml create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/output.txt create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/script create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/test.toml create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py index 1afaf3a4f2..ddd81295ed 100644 --- a/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py @@ -1,11 +1,3 @@ -""" -setup.py configuration script describing how to build and package this project. - -This file is primarily used by the setuptools library and typically should not -be executed directly. See README.md for how to deploy, test, and run -the my_default_python project. -""" - from setuptools import setup, find_packages import sys diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py index 1afaf3a4f2..ddd81295ed 100644 --- a/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py @@ -1,11 +1,3 @@ -""" -setup.py configuration script describing how to build and package this project. - -This file is primarily used by the setuptools library and typically should not -be executed directly. See README.md for how to deploy, test, and run -the my_default_python project. -""" - from setuptools import setup, find_packages import sys diff --git a/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml b/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml new file mode 100644 index 0000000000..dd13c1918a --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml @@ -0,0 +1,56 @@ +bundle: + name: unique_name_libraries + +variables: + cluster: + default: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode + +artifacts: + whl1: + type: whl + path: ./whl1 + whl2: + type: whl + path: ./whl2 + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: task1 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_package + libraries: + - whl: ./whl1/dist/*.whl + - whl: cowsay + - pypi: + package: test_package + - task_key: task2 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_other_package + libraries: + - whl: ./whl2/dist/*.whl + - whl: cowsay + - maven: + coordinates: org.apache.spark:spark-sql_2.12:3.1.1 + - task_key: task3 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl diff --git a/acceptance/bundle/artifacts/unique_name_libraries/output.txt b/acceptance/bundle/artifacts/unique_name_libraries/output.txt new file mode 100644 index 0000000000..ecc7bf57ba --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/output.txt @@ -0,0 +1,10 @@ + +>>> errcode [CLI] bundle deploy +Building whl1... +Building whl2... +Uploading [package name] +Uploading [package name] +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/unique_name_libraries/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/artifacts/unique_name_libraries/script b/acceptance/bundle/artifacts/unique_name_libraries/script new file mode 100644 index 0000000000..6c899df07d --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle deploy +rm -rf whl1 whl2 diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml new file mode 100644 index 0000000000..2f36f7415a --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -0,0 +1,6 @@ +LocalOnly=true + +# The order in which files are uploaded can be different, so we just replace the name +[[Repls]] +Old="Uploading .*-0.0.1-py3-none-any.whl..." +New="Uploading [package name]" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py new file mode 100644 index 0000000000..ca85def329 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_package + +setup( + name="my_package", + version=my_package.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_package/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_package.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py new file mode 100644 index 0000000000..f102a9cadf --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py new file mode 100644 index 0000000000..11b15b1a45 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py new file mode 100644 index 0000000000..5e5b34788b --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_other_package + +setup( + name="my_other_package", + version=my_other_package.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_other_package/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_other_package.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py new file mode 100644 index 0000000000..f102a9cadf --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py new file mode 100644 index 0000000000..11b15b1a45 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go index 2c029de365..6ad63346c5 100644 --- a/bundle/libraries/same_name_libraries.go +++ b/bundle/libraries/same_name_libraries.go @@ -27,37 +27,36 @@ func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) var diags diag.Diagnostics libs := make(map[string]*libData) - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + err := b.Config.Mutate(func(rootConfig dyn.Value) (dyn.Value, error) { var err error for _, pattern := range patterns { - v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) { - libPath, ok := lv.AsString() + rootConfig, err = dyn.MapByPattern(rootConfig, pattern, func(p dyn.Path, libraryValue dyn.Value) (dyn.Value, error) { + libPath, ok := libraryValue.AsString() if !ok { - return lv, nil + return libraryValue, nil } // If not local library, skip the check if !IsLibraryLocal(libPath) { - return lv, nil + return libraryValue, nil } - libFullPath := lv.MustString() - lib := filepath.Base(libFullPath) + lib := filepath.Base(libPath) // If the same basename was seen already but full path is different // then it's a duplicate. Add the location to the location list. lp, ok := libs[lib] if !ok { libs[lib] = &libData{ - fullPath: libFullPath, - locations: []dyn.Location{lv.Location()}, + fullPath: libPath, + locations: []dyn.Location{libraryValue.Location()}, paths: []dyn.Path{p}, } - } else if lp.fullPath != libFullPath { - lp.locations = append(lp.locations, lv.Location()) + } else if lp.fullPath != libPath { + lp.locations = append(lp.locations, libraryValue.Location()) lp.paths = append(lp.paths, p) } - return lv, nil + return libraryValue, nil }) if err != nil { return dyn.InvalidValue, err @@ -68,7 +67,7 @@ func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) return dyn.InvalidValue, err } - return v, nil + return rootConfig, nil }) // Iterate over all the libraries and check if there are any duplicates. From 3360d78b90431b7cd31eb47eb84aaee0c3f9e8ca Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 27 Feb 2025 15:26:24 +0100 Subject: [PATCH 07/11] added full paths --- .../artifacts/same_name_libraries/output.txt | 4 +- .../artifacts/unique_name_libraries/test.toml | 2 +- bundle/libraries/same_name_libraries.go | 20 +-- bundle/libraries/same_name_libraries_test.go | 121 ------------------ bundle/phases/deploy.go | 2 +- 5 files changed, 16 insertions(+), 133 deletions(-) delete mode 100644 bundle/libraries/same_name_libraries_test.go diff --git a/acceptance/bundle/artifacts/same_name_libraries/output.txt b/acceptance/bundle/artifacts/same_name_libraries/output.txt index 1253d96809..ee6c9d5667 100644 --- a/acceptance/bundle/artifacts/same_name_libraries/output.txt +++ b/acceptance/bundle/artifacts/same_name_libraries/output.txt @@ -2,13 +2,13 @@ >>> errcode [CLI] bundle deploy Building whl1... Building whl2... -Error: Duplicate local library name my_default_python-0.0.1-py3-none-any.whl +Error: Duplicate local library names: my_default_python-0.0.1-py3-none-any.whl at resources.jobs.test.tasks[0].libraries[0].whl resources.jobs.test.tasks[1].libraries[0].whl in databricks.yml:36:15 databricks.yml:45:15 -Local library names must be unique +Local library names must be unique but found libraries with the same name: whl1/dist/my_default_python-0.0.1-py3-none-any.whl, whl2/dist/my_default_python-0.0.1-py3-none-any.whl Exit code: 1 diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml index 2f36f7415a..280338bd6a 100644 --- a/acceptance/bundle/artifacts/unique_name_libraries/test.toml +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -1,4 +1,4 @@ -LocalOnly=true +Cloud = false # The order in which files are uploaded can be different, so we just replace the name [[Repls]] diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go index 6ad63346c5..ab869d3d2b 100644 --- a/bundle/libraries/same_name_libraries.go +++ b/bundle/libraries/same_name_libraries.go @@ -3,6 +3,7 @@ package libraries import ( "context" "path/filepath" + "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" @@ -18,9 +19,10 @@ var patterns = []dyn.Pattern{ } type libData struct { - fullPath string - locations []dyn.Location - paths []dyn.Path + fullPath string + locations []dyn.Location + paths []dyn.Path + otherPaths []string } func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { @@ -47,13 +49,15 @@ func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) lp, ok := libs[lib] if !ok { libs[lib] = &libData{ - fullPath: libPath, - locations: []dyn.Location{libraryValue.Location()}, - paths: []dyn.Path{p}, + fullPath: libPath, + locations: []dyn.Location{libraryValue.Location()}, + paths: []dyn.Path{p}, + otherPaths: []string{}, } } else if lp.fullPath != libPath { lp.locations = append(lp.locations, libraryValue.Location()) lp.paths = append(lp.paths, p) + lp.otherPaths = append(lp.otherPaths, libPath) } return libraryValue, nil @@ -77,8 +81,8 @@ func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) if len(lv.locations) > 1 { diags = append(diags, diag.Diagnostic{ Severity: diag.Error, - Summary: "Duplicate local library name " + lib, - Detail: "Local library names must be unique", + Summary: "Duplicate local library names: " + lib, + Detail: "Local library names must be unique but found libraries with the same name: " + lv.fullPath + ", " + strings.Join(lv.otherPaths, ", "), Locations: lv.locations, Paths: lv.paths, }) diff --git a/bundle/libraries/same_name_libraries_test.go b/bundle/libraries/same_name_libraries_test.go deleted file mode 100644 index 42c38773bb..0000000000 --- a/bundle/libraries/same_name_libraries_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package libraries - -import ( - "context" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/internal/bundletest" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/jobs" - "github.com/stretchr/testify/require" -) - -func TestSameNameLibraries(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "test": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - Libraries: []compute.Library{ - { - Whl: "full/path/test.whl", - }, - }, - }, - { - Libraries: []compute.Library{ - { - Whl: "other/path/test.whl", - }, - }, - }, - }, - }, - }, - }, - }, - }, - } - - bundletest.SetLocation(b, "resources.jobs.test.tasks[0]", []dyn.Location{ - {File: "databricks.yml", Line: 10, Column: 1}, - }) - bundletest.SetLocation(b, "resources.jobs.test.tasks[1]", []dyn.Location{ - {File: "databricks.yml", Line: 20, Column: 1}, - }) - - diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries()) - require.Len(t, diags, 1) - require.Equal(t, diag.Error, diags[0].Severity) - require.Equal(t, "Duplicate local library name test.whl", diags[0].Summary) - require.Equal(t, []dyn.Location{ - {File: "databricks.yml", Line: 10, Column: 1}, - {File: "databricks.yml", Line: 20, Column: 1}, - }, diags[0].Locations) - - paths := make([]string, 0) - for _, p := range diags[0].Paths { - paths = append(paths, p.String()) - } - require.Equal(t, []string{ - "resources.jobs.test.tasks[0].libraries[0].whl", - "resources.jobs.test.tasks[1].libraries[0].whl", - }, paths) -} - -func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "test": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - Libraries: []compute.Library{ - { - Whl: "full/path/test-0.1.1.whl", - }, - - { - Whl: "cowsay", - }, - }, - }, - { - Libraries: []compute.Library{ - { - Whl: "other/path/test-0.1.0.whl", - }, - - { - Whl: "cowsay", - }, - }, - }, - { - Libraries: []compute.Library{ - { - Whl: "full/path/test-0.1.1.whl", // Use the same library as the first task - }, - }, - }, - }, - }, - }, - }, - }, - }, - } - - diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries()) - require.Empty(t, diags) -} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 542e11da5e..02e0e9bd84 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -181,8 +181,8 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand // know what are the actual library paths. // libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this // mutator is part of the deploy step rather than validate. - libraries.CheckForSameNameLibraries(), libraries.ExpandGlobReferences(), + libraries.CheckForSameNameLibraries(), libraries.Upload(), trampoline.TransformWheelTask(), files.Upload(outputHandler), From 3224b007527aae142e5b5a2ac1f8f1a29aa87958 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 27 Feb 2025 16:12:51 +0100 Subject: [PATCH 08/11] added repls for Windows slashes --- acceptance/bundle/artifacts/unique_name_libraries/test.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml index 280338bd6a..47a082f663 100644 --- a/acceptance/bundle/artifacts/unique_name_libraries/test.toml +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -4,3 +4,7 @@ Cloud = false [[Repls]] Old="Uploading .*-0.0.1-py3-none-any.whl..." New="Uploading [package name]" + +[[Repls]] +Old = '\\' +New = '/' From 6cd16707b5fb66720d8dc54b89506b177e4225da Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 27 Feb 2025 16:31:40 +0100 Subject: [PATCH 09/11] fix typo --- acceptance/bundle/artifacts/unique_name_libraries/test.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml index 47a082f663..6c47205233 100644 --- a/acceptance/bundle/artifacts/unique_name_libraries/test.toml +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -6,5 +6,5 @@ Old="Uploading .*-0.0.1-py3-none-any.whl..." New="Uploading [package name]" [[Repls]] -Old = '\\' +Old = '\' New = '/' From 085b74b496b43cc1faa409e486bee8e24b14e0cd Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 27 Feb 2025 16:45:17 +0100 Subject: [PATCH 10/11] fix --- acceptance/bundle/artifacts/unique_name_libraries/test.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml index 6c47205233..c85f2065e5 100644 --- a/acceptance/bundle/artifacts/unique_name_libraries/test.toml +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -6,5 +6,5 @@ Old="Uploading .*-0.0.1-py3-none-any.whl..." New="Uploading [package name]" [[Repls]] -Old = '\' -New = '/' +Old = "\\" +New = "/" From b73c78ac96e0c2889c7e2fd14cd31a7fc940e8be Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 27 Feb 2025 17:09:50 +0100 Subject: [PATCH 11/11] repl in a correct file --- acceptance/bundle/artifacts/same_name_libraries/test.toml | 3 +++ acceptance/bundle/artifacts/unique_name_libraries/test.toml | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/acceptance/bundle/artifacts/same_name_libraries/test.toml b/acceptance/bundle/artifacts/same_name_libraries/test.toml index e69de29bb2..a298217f21 100644 --- a/acceptance/bundle/artifacts/same_name_libraries/test.toml +++ b/acceptance/bundle/artifacts/same_name_libraries/test.toml @@ -0,0 +1,3 @@ +[[Repls]] +Old = '\\' +New = '/' diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml index c85f2065e5..280338bd6a 100644 --- a/acceptance/bundle/artifacts/unique_name_libraries/test.toml +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -4,7 +4,3 @@ Cloud = false [[Repls]] Old="Uploading .*-0.0.1-py3-none-any.whl..." New="Uploading [package name]" - -[[Repls]] -Old = "\\" -New = "/"