diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 0020ab739b..246418a3a1 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -6,4 +6,6 @@ ### Bundles +* Processing 'artifacts' section is now done in "bundle validate" (adding defaults, inferring "build", asserting required fields) ([#2526])(https://github.com/databricks/cli/pull/2526)) + ### API Changes diff --git a/acceptance/bundle/artifacts/build_and_files/databricks.yml b/acceptance/bundle/artifacts/build_and_files/databricks.yml new file mode 100644 index 0000000000..159d806fb5 --- /dev/null +++ b/acceptance/bundle/artifacts/build_and_files/databricks.yml @@ -0,0 +1,9 @@ +bundle: + name: build_and_files + +artifacts: + custom: + build: touch built.txt + files: + # this file does not exist yet, will be generated by build command + - source: built.txt diff --git a/acceptance/bundle/artifacts/build_and_files/output.txt b/acceptance/bundle/artifacts/build_and_files/output.txt new file mode 100644 index 0000000000..ef4cb6002b --- /dev/null +++ b/acceptance/bundle/artifacts/build_and_files/output.txt @@ -0,0 +1,11 @@ +{ + "custom": { + "build": "touch built.txt", + "files": [ + { + "source": "[TMPDIR]/built.txt" + } + ], + "path": "[TMPDIR]" + } +} diff --git a/acceptance/bundle/artifacts/build_and_files/script b/acceptance/bundle/artifacts/build_and_files/script new file mode 100644 index 0000000000..c3ba9bd25f --- /dev/null +++ b/acceptance/bundle/artifacts/build_and_files/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .artifacts diff --git a/acceptance/bundle/artifacts/build_and_files/test.toml b/acceptance/bundle/artifacts/build_and_files/test.toml new file mode 100644 index 0000000000..93ee159927 --- /dev/null +++ b/acceptance/bundle/artifacts/build_and_files/test.toml @@ -0,0 +1,5 @@ +RecordRequests = false + +[[Repls]] +Old = '\\\\' +New = '/' diff --git a/acceptance/bundle/artifacts/globs_in_files/databricks.yml b/acceptance/bundle/artifacts/globs_in_files/databricks.yml new file mode 100644 index 0000000000..c486ba5f70 --- /dev/null +++ b/acceptance/bundle/artifacts/globs_in_files/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: expand_globs + +artifacts: + test: + files: + - source: a*.txt + - source: subdir/*.txt diff --git a/acceptance/bundle/artifacts/globs_in_files/output.txt b/acceptance/bundle/artifacts/globs_in_files/output.txt new file mode 100644 index 0000000000..5d5b39de48 --- /dev/null +++ b/acceptance/bundle/artifacts/globs_in_files/output.txt @@ -0,0 +1,61 @@ + +>>> [CLI] bundle validate -o json +Error: a*.txt: no matching files + at artifacts.test.files[0].source + in databricks.yml:7:17 + +Error: subdir/*.txt: no matching files + at artifacts.test.files[1].source + in databricks.yml:8:17 + + +Exit code: 1 +{ + "test": { + "files": [ + { + "source": "a*.txt" + }, + { + "source": "subdir/*.txt" + } + ] + } +} + +>>> [CLI] bundle validate -o json +Error: subdir/*.txt: no matching files + at artifacts.test.files[1].source + in databricks.yml:8:17 + + +Exit code: 1 +{ + "test": { + "files": [ + { + "source": "a*.txt" + }, + { + "source": "subdir/*.txt" + } + ] + } +} + +>>> [CLI] bundle validate -o json +{ + "test": { + "files": [ + { + "source": "a1.txt" + }, + { + "source": "a2.txt" + }, + { + "source": "subdir/hello.txt" + } + ] + } +} diff --git a/acceptance/bundle/artifacts/globs_in_files/script b/acceptance/bundle/artifacts/globs_in_files/script new file mode 100644 index 0000000000..2bf0e30c05 --- /dev/null +++ b/acceptance/bundle/artifacts/globs_in_files/script @@ -0,0 +1,12 @@ +errcode trace $CLI bundle validate -o json | jq .artifacts + +touch a1.txt +touch a2.txt + +errcode trace $CLI bundle validate -o json | jq .artifacts + +mkdir -p subdir +touch subdir/hello.txt +errcode trace $CLI bundle validate -o json | jq .artifacts + +rm -fr a1.txt a2.txt subdir diff --git a/acceptance/bundle/artifacts/globs_in_files/test.toml b/acceptance/bundle/artifacts/globs_in_files/test.toml new file mode 100644 index 0000000000..93ee159927 --- /dev/null +++ b/acceptance/bundle/artifacts/globs_in_files/test.toml @@ -0,0 +1,5 @@ +RecordRequests = false + +[[Repls]] +Old = '\\\\' +New = '/' diff --git a/acceptance/bundle/artifacts/globs_invalid/databricks.yml b/acceptance/bundle/artifacts/globs_invalid/databricks.yml new file mode 100644 index 0000000000..9ceec6ded2 --- /dev/null +++ b/acceptance/bundle/artifacts/globs_invalid/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: expand_globs + +artifacts: + test: + files: + - source: "a[.txt" + - source: "./a[.txt" + - source: "../a[.txt" + - source: "subdir/a[.txt" diff --git a/acceptance/bundle/artifacts/globs_invalid/output.txt b/acceptance/bundle/artifacts/globs_invalid/output.txt new file mode 100644 index 0000000000..178a9dddbd --- /dev/null +++ b/acceptance/bundle/artifacts/globs_invalid/output.txt @@ -0,0 +1,38 @@ + +>>> [CLI] bundle validate -o json +Error: a[.txt: syntax error in pattern + at artifacts.test.files[0].source + in databricks.yml:7:17 + +Error: ./a[.txt: syntax error in pattern + at artifacts.test.files[1].source + in databricks.yml:8:17 + +Error: ../a[.txt: syntax error in pattern + at artifacts.test.files[2].source + in databricks.yml:9:17 + +Error: subdir/a[.txt: syntax error in pattern + at artifacts.test.files[3].source + in databricks.yml:10:17 + + +Exit code: 1 +{ + "test": { + "files": [ + { + "source": "a[.txt" + }, + { + "source": "./a[.txt" + }, + { + "source": "../a[.txt" + }, + { + "source": "subdir/a[.txt" + } + ] + } +} diff --git a/acceptance/bundle/artifacts/globs_invalid/script b/acceptance/bundle/artifacts/globs_invalid/script new file mode 100644 index 0000000000..aab886a3d5 --- /dev/null +++ b/acceptance/bundle/artifacts/globs_invalid/script @@ -0,0 +1 @@ +errcode trace $CLI bundle validate -o json | jq .artifacts diff --git a/acceptance/bundle/artifacts/globs_invalid/test.toml b/acceptance/bundle/artifacts/globs_invalid/test.toml new file mode 100644 index 0000000000..a030353d57 --- /dev/null +++ b/acceptance/bundle/artifacts/globs_invalid/test.toml @@ -0,0 +1 @@ +RecordRequests = false diff --git a/acceptance/bundle/debug/out.stderr.txt b/acceptance/bundle/debug/out.stderr.txt index 631f51990b..dc8f52b79a 100644 --- a/acceptance/bundle/debug/out.stderr.txt +++ b/acceptance/bundle/debug/out.stderr.txt @@ -60,6 +60,8 @@ 10:07:59 Debug: Apply pid=12345 mutator=ConfigureWSFS 10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths 10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning +10:07:59 Debug: Apply pid=12345 mutator=artifacts.Prepare +10:07:59 Info: No local tasks in databricks.yml config, skipping auto detect pid=12345 mutator=artifacts.Prepare 10:07:59 Debug: Apply pid=12345 mutator=apps.Validate 10:07:59 Debug: Apply pid=12345 mutator=ValidateSharedRootPermissions 10:07:59 Debug: Apply pid=12345 mutator=ApplyBundlePermissions diff --git a/bundle/artifacts/all.go b/bundle/artifacts/all.go deleted file mode 100644 index b78e7c100a..0000000000 --- a/bundle/artifacts/all.go +++ /dev/null @@ -1,42 +0,0 @@ -package artifacts - -import ( - "context" - "fmt" - "slices" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" - "golang.org/x/exp/maps" -) - -// all is an internal proxy for producing a list of mutators for all artifacts. -// It is used to produce the [BuildAll] and [UploadAll] mutators. -type all struct { - name string - fn func(name string) (bundle.Mutator, error) -} - -func (m *all) Name() string { - return fmt.Sprintf("artifacts.%sAll", m.name) -} - -func (m *all) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - var out []bundle.Mutator - - // Iterate with stable ordering. - keys := maps.Keys(b.Config.Artifacts) - slices.Sort(keys) - - for _, name := range keys { - m, err := m.fn(name) - if err != nil { - return diag.FromErr(err) - } - if m != nil { - out = append(out, m) - } - } - - return bundle.ApplySeq(ctx, b, out...) -} diff --git a/bundle/artifacts/artifacts.go b/bundle/artifacts/artifacts.go deleted file mode 100644 index e5e55a14d5..0000000000 --- a/bundle/artifacts/artifacts.go +++ /dev/null @@ -1,74 +0,0 @@ -package artifacts - -import ( - "context" - "fmt" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/whl" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/log" -) - -type mutatorFactory = func(name string) bundle.Mutator - -var buildMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{ - config.ArtifactPythonWheel: whl.Build, -} - -var prepareMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{ - config.ArtifactPythonWheel: whl.Prepare, -} - -func getBuildMutator(t config.ArtifactType, name string) bundle.Mutator { - mutatorFactory, ok := buildMutators[t] - if !ok { - mutatorFactory = BasicBuild - } - - return mutatorFactory(name) -} - -func getPrepareMutator(t config.ArtifactType, name string) bundle.Mutator { - mutatorFactory, ok := prepareMutators[t] - if !ok { - mutatorFactory = func(_ string) bundle.Mutator { - return mutator.NoOp() - } - } - - return mutatorFactory(name) -} - -// Basic Build defines a general build mutator which builds artifact based on artifact.BuildCommand -type basicBuild struct { - name string -} - -func BasicBuild(name string) bundle.Mutator { - return &basicBuild{name: name} -} - -func (m *basicBuild) Name() string { - return fmt.Sprintf("artifacts.Build(%s)", m.name) -} - -func (m *basicBuild) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - artifact, ok := b.Config.Artifacts[m.name] - if !ok { - return diag.Errorf("artifact doesn't exist: %s", m.name) - } - - cmdio.LogString(ctx, fmt.Sprintf("Building %s...", m.name)) - - out, err := artifact.Build(ctx) - if err != nil { - return diag.Errorf("build for %s failed, error: %v, output: %s", m.name, err, out) - } - log.Infof(ctx, "Build succeeded") - - return nil -} diff --git a/bundle/artifacts/build.go b/bundle/artifacts/build.go index 94880bc2cb..62529b7ab6 100644 --- a/bundle/artifacts/build.go +++ b/bundle/artifacts/build.go @@ -3,55 +3,85 @@ package artifacts import ( "context" "fmt" + "path/filepath" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/exec" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/python" ) -func BuildAll() bundle.Mutator { - return &all{ - name: "Build", - fn: buildArtifactByName, - } -} - -type build struct { - name string +func Build() bundle.Mutator { + return &build{} } -func buildArtifactByName(name string) (bundle.Mutator, error) { - return &build{name}, nil -} +type build struct{} func (m *build) Name() string { - return fmt.Sprintf("artifacts.Build(%s)", m.name) + return "artifacts.Build" } func (m *build) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - artifact, ok := b.Config.Artifacts[m.name] - if !ok { - return diag.Errorf("artifact doesn't exist: %s", m.name) - } + var diags diag.Diagnostics + + for _, artifactName := range sortedKeys(b.Config.Artifacts) { + a := b.Config.Artifacts[artifactName] + + if a.BuildCommand == "" { + continue + } - var mutators []bundle.Mutator + cmdio.LogString(ctx, fmt.Sprintf("Building %s...", artifactName)) - // Skip building if build command is not specified or infered - if artifact.BuildCommand == "" { - // If no build command was specified or infered and there is no - // artifact output files specified, artifact is misconfigured - if len(artifact.Files) == 0 { - return diag.Errorf("misconfigured artifact: please specify 'build' or 'files' property") + var executor *exec.Executor + var err error + if a.Executable != "" { + executor, err = exec.NewCommandExecutorWithExecutable(a.Path, a.Executable) + } else { + executor, err = exec.NewCommandExecutor(a.Path) + a.Executable = executor.ShellType() } - // We can skip calling build mutator if there is no build command - // But we still need to expand glob references in files source path. - } else { - mutators = append(mutators, getBuildMutator(artifact.Type, m.name)) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + if diags.HasError() { + break + } + } + + out, err := executor.Exec(ctx, a.BuildCommand) + if err != nil { + return diag.Errorf("build failed %s, error: %v, output: %s", artifactName, err, out) + } + + if a.Type == "whl" { + dir := a.Path + distPath := filepath.Join(a.Path, "dist") + wheels := python.FindFilesWithSuffixInPath(distPath, ".whl") + if len(wheels) == 0 { + return diag.Errorf("cannot find built wheel in %s for package %s", dir, artifactName) + } + for _, wheel := range wheels { + a.Files = append(a.Files, config.ArtifactFile{ + Source: wheel, + }) + } + log.Infof(ctx, "%s: Build succeeded, found %s", artifactName, wheels) + } else { + log.Infof(ctx, "%s: Build succeeded", artifactName) + } + + // We need to expand glob reference after build mutator is applied because + // if we do it before, any files that are generated by build command will + // not be included into artifact.Files and thus will not be uploaded. + diags = diags.Extend(expandGlobs(ctx, b, artifactName)) + if diags.HasError() { + break + } } - // We need to expand glob reference after build mutator is applied because - // if we do it before, any files that are generated by build command will - // not be included into artifact.Files and thus will not be uploaded. - mutators = append(mutators, &expandGlobs{name: m.name}) - return bundle.ApplySeq(ctx, b, mutators...) + return diags } diff --git a/bundle/artifacts/expand_globs.go b/bundle/artifacts/expand_globs.go index c0af7c69e7..889a62e268 100644 --- a/bundle/artifacts/expand_globs.go +++ b/bundle/artifacts/expand_globs.go @@ -10,14 +10,6 @@ import ( "github.com/databricks/cli/libs/dyn" ) -type expandGlobs struct { - name string -} - -func (m *expandGlobs) Name() string { - return fmt.Sprintf("artifacts.ExpandGlobs(%s)", m.name) -} - func createGlobError(v dyn.Value, p dyn.Path, message string) diag.Diagnostic { // The pattern contained in v is an absolute path. // Make it relative to the value's location to make it more readable. @@ -37,12 +29,12 @@ func createGlobError(v dyn.Value, p dyn.Path, message string) diag.Diagnostic { } } -func (m *expandGlobs) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { +func expandGlobs(ctx context.Context, b *bundle.Bundle, name string) diag.Diagnostics { // Base path for this mutator. // This path is set with the list of expanded globs when done. base := dyn.NewPath( dyn.Key("artifacts"), - dyn.Key(m.name), + dyn.Key(name), dyn.Key("files"), ) diff --git a/bundle/artifacts/expand_globs_test.go b/bundle/artifacts/expand_globs_test.go deleted file mode 100644 index 7bf8863306..0000000000 --- a/bundle/artifacts/expand_globs_test.go +++ /dev/null @@ -1,156 +0,0 @@ -package artifacts - -import ( - "context" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/internal/bundletest" - "github.com/databricks/cli/internal/testutil" - "github.com/databricks/cli/libs/dyn" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestExpandGlobs_Nominal(t *testing.T) { - tmpDir := t.TempDir() - - testutil.Touch(t, tmpDir, "aa1.txt") - testutil.Touch(t, tmpDir, "aa2.txt") - testutil.Touch(t, tmpDir, "bb.txt") - testutil.Touch(t, tmpDir, "bc.txt") - - b := &bundle.Bundle{ - BundleRootPath: tmpDir, - Config: config.Root{ - Artifacts: config.Artifacts{ - "test": { - Files: []config.ArtifactFile{ - {Source: "./aa*.txt"}, - {Source: "./b[bc].txt"}, - }, - }, - }, - }, - } - - bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) - - ctx := context.Background() - diags := bundle.ApplySeq(ctx, b, - // Run prepare first to make paths absolute. - &prepare{"test"}, - &expandGlobs{"test"}, - ) - require.NoError(t, diags.Error()) - - // Assert that the expanded paths are correct. - a, ok := b.Config.Artifacts["test"] - if !assert.True(t, ok) { - return - } - assert.Len(t, a.Files, 4) - assert.Equal(t, filepath.Join(tmpDir, "aa1.txt"), a.Files[0].Source) - assert.Equal(t, filepath.Join(tmpDir, "aa2.txt"), a.Files[1].Source) - assert.Equal(t, filepath.Join(tmpDir, "bb.txt"), a.Files[2].Source) - assert.Equal(t, filepath.Join(tmpDir, "bc.txt"), a.Files[3].Source) -} - -func TestExpandGlobs_InvalidPattern(t *testing.T) { - tmpDir := t.TempDir() - - b := &bundle.Bundle{ - BundleRootPath: tmpDir, - Config: config.Root{ - Artifacts: config.Artifacts{ - "test": { - Files: []config.ArtifactFile{ - {Source: "a[.txt"}, - {Source: "./a[.txt"}, - {Source: "../a[.txt"}, - {Source: "subdir/a[.txt"}, - }, - }, - }, - }, - } - - bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) - - ctx := context.Background() - diags := bundle.ApplySeq(ctx, b, - // Run prepare first to make paths absolute. - &prepare{"test"}, - &expandGlobs{"test"}, - ) - - assert.Len(t, diags, 4) - assert.Equal(t, filepath.Clean("a[.txt")+": syntax error in pattern", diags[0].Summary) - assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[0].Locations[0].File) - assert.Equal(t, "artifacts.test.files[0].source", diags[0].Paths[0].String()) - assert.Equal(t, filepath.Clean("a[.txt")+": syntax error in pattern", diags[1].Summary) - assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[1].Locations[0].File) - assert.Equal(t, "artifacts.test.files[1].source", diags[1].Paths[0].String()) - assert.Equal(t, filepath.Clean("../a[.txt")+": syntax error in pattern", diags[2].Summary) - assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[2].Locations[0].File) - assert.Equal(t, "artifacts.test.files[2].source", diags[2].Paths[0].String()) - assert.Equal(t, filepath.Clean("subdir/a[.txt")+": syntax error in pattern", diags[3].Summary) - assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[3].Locations[0].File) - assert.Equal(t, "artifacts.test.files[3].source", diags[3].Paths[0].String()) -} - -func TestExpandGlobs_NoMatches(t *testing.T) { - tmpDir := t.TempDir() - - testutil.Touch(t, tmpDir, "a1.txt") - testutil.Touch(t, tmpDir, "a2.txt") - testutil.Touch(t, tmpDir, "b1.txt") - testutil.Touch(t, tmpDir, "b2.txt") - - b := &bundle.Bundle{ - BundleRootPath: tmpDir, - Config: config.Root{ - Artifacts: config.Artifacts{ - "test": { - Files: []config.ArtifactFile{ - {Source: "a*.txt"}, - {Source: "b*.txt"}, - {Source: "c*.txt"}, - {Source: "d*.txt"}, - }, - }, - }, - }, - } - - bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) - - ctx := context.Background() - diags := bundle.ApplySeq(ctx, b, - // Run prepare first to make paths absolute. - &prepare{"test"}, - &expandGlobs{"test"}, - ) - - assert.Len(t, diags, 2) - assert.Equal(t, "c*.txt: no matching files", diags[0].Summary) - assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[0].Locations[0].File) - assert.Equal(t, "artifacts.test.files[2].source", diags[0].Paths[0].String()) - assert.Equal(t, "d*.txt: no matching files", diags[1].Summary) - assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[1].Locations[0].File) - assert.Equal(t, "artifacts.test.files[3].source", diags[1].Paths[0].String()) - - // Assert that the original paths are unchanged. - a, ok := b.Config.Artifacts["test"] - if !assert.True(t, ok) { - return - } - - assert.Len(t, a.Files, 4) - assert.Equal(t, "a*.txt", filepath.Base(a.Files[0].Source)) - assert.Equal(t, "b*.txt", filepath.Base(a.Files[1].Source)) - assert.Equal(t, "c*.txt", filepath.Base(a.Files[2].Source)) - assert.Equal(t, "d*.txt", filepath.Base(a.Files[3].Source)) -} diff --git a/bundle/artifacts/infer.go b/bundle/artifacts/infer.go deleted file mode 100644 index abc5091070..0000000000 --- a/bundle/artifacts/infer.go +++ /dev/null @@ -1,65 +0,0 @@ -package artifacts - -import ( - "context" - "fmt" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/whl" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/libs/diag" -) - -var inferMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{ - config.ArtifactPythonWheel: whl.InferBuildCommand, -} - -func getInferMutator(t config.ArtifactType, name string) bundle.Mutator { - mutatorFactory, ok := inferMutators[t] - if !ok { - return nil - } - - return mutatorFactory(name) -} - -func InferMissingProperties() bundle.Mutator { - return &all{ - name: "infer", - fn: inferArtifactByName, - } -} - -func inferArtifactByName(name string) (bundle.Mutator, error) { - return &infer{name}, nil -} - -type infer struct { - name string -} - -func (m *infer) Name() string { - return fmt.Sprintf("artifacts.Infer(%s)", m.name) -} - -func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - artifact, ok := b.Config.Artifacts[m.name] - if !ok { - return diag.Errorf("artifact doesn't exist: %s", m.name) - } - - // only try to infer command if it's not already defined - // and there is no explicitly files defined which means - // that the package is built outside of bundle cycles - // manually by customer - if artifact.BuildCommand != "" || len(artifact.Files) > 0 { - return nil - } - - inferMutator := getInferMutator(artifact.Type, m.name) - if inferMutator != nil { - return bundle.Apply(ctx, b, inferMutator) - } - - return nil -} diff --git a/bundle/artifacts/prepare.go b/bundle/artifacts/prepare.go index 91e0bd0916..fb3b718ab9 100644 --- a/bundle/artifacts/prepare.go +++ b/bundle/artifacts/prepare.go @@ -2,57 +2,147 @@ package artifacts import ( "context" - "fmt" + "os" "path/filepath" + "sort" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/python" ) -func PrepareAll() bundle.Mutator { - return &all{ - name: "Prepare", - fn: prepareArtifactByName, - } +func Prepare() bundle.Mutator { + return &prepare{} } -type prepare struct { - name string -} - -func prepareArtifactByName(name string) (bundle.Mutator, error) { - return &prepare{name}, nil -} +type prepare struct{} func (m *prepare) Name() string { - return fmt.Sprintf("artifacts.Prepare(%s)", m.name) + return "artifacts.Prepare" } func (m *prepare) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - artifact, ok := b.Config.Artifacts[m.name] - if !ok { - return diag.Errorf("artifact doesn't exist: %s", m.name) + var diags diag.Diagnostics + + err := InsertPythonArtifact(ctx, b) + if err != nil { + return diag.FromErr(err) + } + + removeFolders := make(map[string]bool, len(b.Config.Artifacts)) + cleanupWheelFolders := make(map[string]bool, len(b.Config.Artifacts)) + + for _, artifactName := range sortedKeys(b.Config.Artifacts) { + artifact := b.Config.Artifacts[artifactName] + + if artifact.Type == "whl" { + if artifact.BuildCommand == "" && len(artifact.Files) == 0 { + artifact.BuildCommand = python.GetExecutable() + " setup.py bdist_wheel" + } + } + + l := b.Config.GetLocation("artifacts." + artifactName) + dirPath := filepath.Dir(l.File) + + // Check if source paths are absolute, if not, make them absolute + for k := range artifact.Files { + f := &artifact.Files[k] + if !filepath.IsAbs(f.Source) { + f.Source = filepath.Join(dirPath, f.Source) + } + } + + if artifact.Path == "" { + artifact.Path = b.BundleRootPath + } + + if !filepath.IsAbs(artifact.Path) { + artifact.Path = filepath.Join(dirPath, artifact.Path) + } + + if artifact.Type == "whl" && artifact.BuildCommand != "" { + dir := artifact.Path + removeFolders[filepath.Join(dir, "dist")] = true + cleanupWheelFolders[dir] = true + } + + if artifact.BuildCommand == "" && len(artifact.Files) == 0 { + diags = diags.Extend(diag.Errorf("misconfigured artifact: please specify 'build' or 'files' property")) + } + + if len(artifact.Files) > 0 && artifact.BuildCommand == "" { + diags = diags.Extend(expandGlobs(ctx, b, artifactName)) + } + + if diags.HasError() { + break + } } - l := b.Config.GetLocation("artifacts." + m.name) - dirPath := filepath.Dir(l.File) + if diags.HasError() { + return diags + } - // Check if source paths are absolute, if not, make them absolute - for k := range artifact.Files { - f := &artifact.Files[k] - if !filepath.IsAbs(f.Source) { - f.Source = filepath.Join(dirPath, f.Source) + for _, dir := range sortedKeys(removeFolders) { + err := os.RemoveAll(dir) + if err != nil { + log.Infof(ctx, "Failed to remove %s: %s", dir, err) } } - // If artifact path is not provided, use bundle root dir - if artifact.Path == "" { - artifact.Path = b.BundleRootPath + for _, dir := range sortedKeys(cleanupWheelFolders) { + log.Infof(ctx, "Cleaning up Python build artifacts in %s", dir) + python.CleanupWheelFolder(dir) + } + + return diags +} + +func InsertPythonArtifact(ctx context.Context, b *bundle.Bundle) error { + if b.Config.Artifacts != nil { + log.Debugf(ctx, "artifacts block is defined, skipping auto-detecting") + return nil + } + + tasks := libraries.FindTasksWithLocalLibraries(b) + if len(tasks) == 0 { + log.Infof(ctx, "No local tasks in databricks.yml config, skipping auto detect") + return nil + } + + // checking if there is setup.py in the bundle root + setupPy := filepath.Join(b.BundleRootPath, "setup.py") + _, err := os.Stat(setupPy) + if err != nil { + log.Infof(ctx, "No Python wheel project found at bundle root folder") + return nil + } + + log.Infof(ctx, "Found Python wheel project at %s", b.BundleRootPath) + + pkgPath, err := filepath.Abs(b.BundleRootPath) + if err != nil { + return err } - if !filepath.IsAbs(artifact.Path) { - artifact.Path = filepath.Join(dirPath, artifact.Path) + b.Config.Artifacts = make(map[string]*config.Artifact) + b.Config.Artifacts["python_artifact"] = &config.Artifact{ + Path: pkgPath, + Type: config.ArtifactPythonWheel, + // BuildCommand will be auto set by later code } - return bundle.Apply(ctx, b, getPrepareMutator(artifact.Type, m.name)) + return nil +} + +func sortedKeys[T any](m map[string]T) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys } diff --git a/bundle/artifacts/whl/autodetect.go b/bundle/artifacts/whl/autodetect.go deleted file mode 100644 index 9eead83b74..0000000000 --- a/bundle/artifacts/whl/autodetect.go +++ /dev/null @@ -1,62 +0,0 @@ -package whl - -import ( - "context" - "os" - "path/filepath" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/libraries" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/log" -) - -type detectPkg struct{} - -func DetectPackage() bundle.Mutator { - return &detectPkg{} -} - -func (m *detectPkg) Name() string { - return "artifacts.whl.AutoDetect" -} - -func (m *detectPkg) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - if b.Config.Artifacts != nil { - log.Debugf(ctx, "artifacts block is defined, skipping auto-detecting") - return nil - } - - tasks := libraries.FindTasksWithLocalLibraries(b) - if len(tasks) == 0 { - log.Infof(ctx, "No local tasks in databricks.yml config, skipping auto detect") - return nil - } - - log.Infof(ctx, "Detecting Python wheel project...") - - // checking if there is setup.py in the bundle root - setupPy := filepath.Join(b.BundleRootPath, "setup.py") - _, err := os.Stat(setupPy) - if err != nil { - log.Infof(ctx, "No Python wheel project found at bundle root folder") - return nil - } - - log.Infof(ctx, "Found Python wheel project at %s", b.BundleRootPath) - - pkgPath, err := filepath.Abs(b.BundleRootPath) - if err != nil { - return diag.FromErr(err) - } - - b.Config.Artifacts = make(map[string]*config.Artifact) - b.Config.Artifacts["python_artifact"] = &config.Artifact{ - Path: pkgPath, - Type: config.ArtifactPythonWheel, - // BuildCommand will be set by bundle/artifacts/whl/infer.go to "python3 setup.py bdist_wheel" - } - - return nil -} diff --git a/bundle/artifacts/whl/build.go b/bundle/artifacts/whl/build.go deleted file mode 100644 index 18d4b8ede6..0000000000 --- a/bundle/artifacts/whl/build.go +++ /dev/null @@ -1,57 +0,0 @@ -package whl - -import ( - "context" - "fmt" - "path/filepath" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/python" -) - -type build struct { - name string -} - -func Build(name string) bundle.Mutator { - return &build{ - name: name, - } -} - -func (m *build) Name() string { - return fmt.Sprintf("artifacts.whl.Build(%s)", m.name) -} - -func (m *build) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - artifact, ok := b.Config.Artifacts[m.name] - if !ok { - return diag.Errorf("artifact doesn't exist: %s", m.name) - } - - cmdio.LogString(ctx, fmt.Sprintf("Building %s...", m.name)) - - out, err := artifact.Build(ctx) - if err != nil { - return diag.Errorf("build failed %s, error: %v, output: %s", m.name, err, out) - } - log.Infof(ctx, "Build succeeded") - - dir := artifact.Path - distPath := filepath.Join(artifact.Path, "dist") - wheels := python.FindFilesWithSuffixInPath(distPath, ".whl") - if len(wheels) == 0 { - return diag.Errorf("cannot find built wheel in %s for package %s", dir, m.name) - } - for _, wheel := range wheels { - artifact.Files = append(artifact.Files, config.ArtifactFile{ - Source: wheel, - }) - } - - return nil -} diff --git a/bundle/artifacts/whl/infer.go b/bundle/artifacts/whl/infer.go deleted file mode 100644 index 9c40360bea..0000000000 --- a/bundle/artifacts/whl/infer.go +++ /dev/null @@ -1,48 +0,0 @@ -package whl - -import ( - "context" - "fmt" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/python" -) - -type infer struct { - name string -} - -func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - artifact := b.Config.Artifacts[m.name] - - // Note: using --build-number (build tag) flag does not help with re-installing - // libraries on all-purpose clusters. The reason is that `pip` ignoring build tag - // when upgrading the library and only look at wheel version. - // Build tag is only used for sorting the versions and the one with higher build tag takes priority when installed. - // It only works if no library is installed - // See https://github.com/pypa/pip/blob/a15dd75d98884c94a77d349b800c7c755d8c34e4/src/pip/_internal/index/package_finder.py#L522-L556 - // https://github.com/pypa/pip/issues/4781 - // - // Thus, the only way to reinstall the library on all-purpose cluster is to increase wheel version manually or - // use automatic version generation, f.e. - // setup( - // version=datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"), - // ... - //) - - py := python.GetExecutable() - artifact.BuildCommand = py + " setup.py bdist_wheel" - - return nil -} - -func (m *infer) Name() string { - return fmt.Sprintf("artifacts.whl.Infer(%s)", m.name) -} - -func InferBuildCommand(name string) bundle.Mutator { - return &infer{ - name: name, - } -} diff --git a/bundle/artifacts/whl/prepare.go b/bundle/artifacts/whl/prepare.go deleted file mode 100644 index 0fbb2080af..0000000000 --- a/bundle/artifacts/whl/prepare.go +++ /dev/null @@ -1,53 +0,0 @@ -package whl - -import ( - "context" - "fmt" - "os" - "path/filepath" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/python" -) - -type prepare struct { - name string -} - -func Prepare(name string) bundle.Mutator { - return &prepare{ - name: name, - } -} - -func (m *prepare) Name() string { - return fmt.Sprintf("artifacts.whl.Prepare(%s)", m.name) -} - -func (m *prepare) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - artifact, ok := b.Config.Artifacts[m.name] - if !ok { - return diag.Errorf("artifact doesn't exist: %s", m.name) - } - - // If there is no build command for the artifact, we don't need to cleanup the dist folder before - if artifact.BuildCommand == "" { - return nil - } - - dir := artifact.Path - - distPath := filepath.Join(dir, "dist") - - // If we have multiple artifacts con figured, prepare will be called multiple times - // The first time we will remove the folders, other times will be no-op. - err := os.RemoveAll(distPath) - if err != nil { - log.Infof(ctx, "Failed to remove dist folder: %v", err) - } - python.CleanupWheelFolder(dir) - - return nil -} diff --git a/bundle/artifacts/whl/testdata/setup.py b/bundle/artifacts/whl/testdata/setup.py deleted file mode 100644 index 7a1317b2f5..0000000000 --- a/bundle/artifacts/whl/testdata/setup.py +++ /dev/null @@ -1,15 +0,0 @@ -from setuptools import setup, find_packages - -import my_test_code - -setup( - name="my_test_code", - version=my_test_code.__version__, - author=my_test_code.__author__, - url="https://databricks.com", - author_email="john.doe@databricks.com", - description="my test wheel", - packages=find_packages(include=["my_test_code"]), - entry_points={"group_1": "run=my_test_code.__main__:main"}, - install_requires=["setuptools"], -) diff --git a/bundle/artifacts/whl/testdata/setup_incorrect.py b/bundle/artifacts/whl/testdata/setup_incorrect.py deleted file mode 100644 index c6aa17b2d8..0000000000 --- a/bundle/artifacts/whl/testdata/setup_incorrect.py +++ /dev/null @@ -1,14 +0,0 @@ -from setuptools import setup, find_packages - -import my_test_code - -setup( - version=my_test_code.__version__, - author=my_test_code.__author__, - url="https://databricks.com", - author_email="john.doe@databricks.com", - description="my test wheel", - packages=find_packages(include=["my_test_code"]), - entry_points={"group_1": "run=my_test_code.__main__:main"}, - install_requires=["setuptools"], -) diff --git a/bundle/artifacts/whl/testdata/setup_minimal.py b/bundle/artifacts/whl/testdata/setup_minimal.py deleted file mode 100644 index 3e81e72173..0000000000 --- a/bundle/artifacts/whl/testdata/setup_minimal.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup(name="my_test_code") diff --git a/bundle/config/artifact.go b/bundle/config/artifact.go index 177799e119..93c220136f 100644 --- a/bundle/config/artifact.go +++ b/bundle/config/artifact.go @@ -1,9 +1,6 @@ package config import ( - "context" - "errors" - "github.com/databricks/cli/libs/exec" ) @@ -34,22 +31,3 @@ type Artifact struct { Executable exec.ExecutableType `json:"executable,omitempty"` } - -func (a *Artifact) Build(ctx context.Context) ([]byte, error) { - if a.BuildCommand == "" { - return nil, errors.New("no build property defined") - } - - var e *exec.Executor - var err error - if a.Executable != "" { - e, err = exec.NewCommandExecutorWithExecutable(a.Path, a.Executable) - } else { - e, err = exec.NewCommandExecutor(a.Path) - a.Executable = e.ShellType() - } - if err != nil { - return nil, err - } - return e.Exec(ctx, a.BuildCommand) -} diff --git a/bundle/config/artifacts_test.go b/bundle/config/artifacts_test.go deleted file mode 100644 index 5fa159fdf3..0000000000 --- a/bundle/config/artifacts_test.go +++ /dev/null @@ -1,18 +0,0 @@ -package config - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestArtifactBuild(t *testing.T) { - artifact := Artifact{ - BuildCommand: "echo 'Hello from build command'", - } - res, err := artifact.Build(context.Background()) - assert.NoError(t, err) - assert.NotNil(t, res) - assert.Equal(t, "Hello from build command\n", string(res)) -} diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 0170ed51cf..7d98dfff52 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -5,7 +5,6 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts" - "github.com/databricks/cli/bundle/artifacts/whl" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/scripts" @@ -19,10 +18,7 @@ func Build(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return bundle.ApplySeq(ctx, b, scripts.Execute(config.ScriptPreBuild), - whl.DetectPackage(), - artifacts.InferMissingProperties(), - artifacts.PrepareAll(), - artifacts.BuildAll(), + artifacts.Build(), scripts.Execute(config.ScriptPostBuild), mutator.ResolveVariableReferences( "artifacts", diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 1da5b61f49..b3d21be9f4 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -5,6 +5,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/apps" + "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" pythonmutator "github.com/databricks/cli/bundle/config/mutator/python" @@ -96,6 +97,8 @@ func Initialize(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { mutator.TranslatePaths(), trampoline.WrapperWarning(), + artifacts.Prepare(), + apps.Validate(), permissions.ValidateSharedRootPermissions(), diff --git a/integration/bundle/init_default_python_test.go b/integration/bundle/init_default_python_test.go index 9ac8f47bdf..917e06f1e4 100644 --- a/integration/bundle/init_default_python_test.go +++ b/integration/bundle/init_default_python_test.go @@ -126,6 +126,8 @@ func testDefaultPython(t *testing.T, pythonVersion string) { []string{"bundle", "summary", "--output", "json"}, testutil.TestData("testdata/default_python/bundle_summary.txt"), []string{ + "/artifacts/python_artifact/build", + "/artifacts/python_artifact/path", "/bundle/terraform/exec_path", "/resources/jobs/project_name_$UNIQUE_PRJ_job/email_notifications", "/resources/jobs/project_name_$UNIQUE_PRJ_job/job_clusters/0/new_cluster/node_type_id", diff --git a/integration/bundle/testdata/default_python/bundle_summary.txt b/integration/bundle/testdata/default_python/bundle_summary.txt index 0b4cbb1814..6f2eb698f0 100644 --- a/integration/bundle/testdata/default_python/bundle_summary.txt +++ b/integration/bundle/testdata/default_python/bundle_summary.txt @@ -59,6 +59,13 @@ "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/artifacts", "state_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/state" }, + "artifacts": { + "python_artifact": { + "type": "whl", + "path": "/tmp/.../project_name_$UNIQUE_PRJ", + "build": "python3 setup.py bdist_wheel" + } + }, "resources": { "jobs": { "project_name_$UNIQUE_PRJ_job": {