Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions bundle/bundle_read_only.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package bundle

import (
"context"

"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go"
)

type ReadOnlyBundle struct {
b *Bundle
}

func ReadOnly(b *Bundle) ReadOnlyBundle {
return ReadOnlyBundle{b: b}
}

func (r ReadOnlyBundle) Config() config.Root {
return r.b.Config
}

func (r ReadOnlyBundle) RootPath() string {
return r.b.RootPath
}

func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient {
return r.b.WorkspaceClient()
}

func (r ReadOnlyBundle) CacheDir(ctx context.Context, paths ...string) (string, error) {
return r.b.CacheDir(ctx, paths...)
}

func (r ReadOnlyBundle) GetSyncIncludePatterns(ctx context.Context) ([]string, error) {
return r.b.GetSyncIncludePatterns(ctx)
}
2 changes: 1 addition & 1 deletion bundle/config/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ func validateVariableOverrides(root, target dyn.Value) (err error) {
// Best effort to get the location of configuration value at the specified path.
// This function is useful to annotate error messages with the location, because
// we don't want to fail with a different error message if we cannot retrieve the location.
func (r *Root) GetLocation(path string) dyn.Location {
func (r Root) GetLocation(path string) dyn.Location {
v, err := dyn.Get(r.value, path)
if err != nil {
return dyn.Location{}
Expand Down
54 changes: 54 additions & 0 deletions bundle/config/validate/files_to_sync.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package validate

import (
"context"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/deploy/files"
"github.com/databricks/cli/libs/diag"
)

func FilesToSync() bundle.ReadOnlyMutator {
return &filesToSync{}
}

type filesToSync struct {
}

func (v *filesToSync) Name() string {
return "validate:files_to_sync"
}

func (v *filesToSync) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
sync, err := files.GetSync(ctx, rb)
if err != nil {
return diag.FromErr(err)
}

fl, err := sync.GetFileList(ctx)
if err != nil {
return diag.FromErr(err)
}

if len(fl) != 0 {
return nil
}

diags := diag.Diagnostics{}
if len(rb.Config().Sync.Exclude) == 0 {
diags = diags.Append(diag.Diagnostic{
Severity: diag.Warning,
Summary: "There are no files to sync, please check your .gitignore",
})
} else {
loc := location{path: "sync.exclude", rb: rb}
diags = diags.Append(diag.Diagnostic{
Severity: diag.Warning,
Summary: "There are no files to sync, please check your .gitignore and sync.exclude configuration",
Location: loc.Location(),
Path: loc.Path(),
})
}

return diags
}
53 changes: 53 additions & 0 deletions bundle/config/validate/job_cluster_key_defined.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package validate

import (
"context"
"fmt"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
)

func JobClusterKeyDefined() bundle.ReadOnlyMutator {
return &jobClusterKeyDefined{}
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this one! Question, no immediate action needed.

So we have a few more cases of references throughout our APIs. I suspect the ones that customers would hit most often are job_cluster_key, then task_key, and then some small long tails including the new environment_key. Makes me wonder how far we should go with these checks? And maybe whether it's worthwhile making this pattern more generic?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are able to define to which fields attributes like task_key, job_cluster_key and etc are referencing to in some general way (like key value map of config path and etc.) we can make these generic. But I like the very explicit nature of it and would rather prefer add separate explicit mutator for each type of these checks


type jobClusterKeyDefined struct {
}

func (v *jobClusterKeyDefined) Name() string {
return "validate:job_cluster_key_defined"
}

func (v *jobClusterKeyDefined) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
diags := diag.Diagnostics{}

for k, job := range rb.Config().Resources.Jobs {
jobClusterKeys := make(map[string]bool)
for _, cluster := range job.JobClusters {
if cluster.JobClusterKey != "" {
jobClusterKeys[cluster.JobClusterKey] = true
}
}

for index, task := range job.Tasks {
if task.JobClusterKey != "" {
if _, ok := jobClusterKeys[task.JobClusterKey]; !ok {
loc := location{
path: fmt.Sprintf("resources.jobs.%s.tasks[%d].job_cluster_key", k, index),
rb: rb,
}

diags = diags.Append(diag.Diagnostic{
Severity: diag.Warning,
Summary: fmt.Sprintf("job_cluster_key %s is not defined", task.JobClusterKey),
Location: loc.Location(),
Path: loc.Path(),
})
}
}
}
}

return diags
}
97 changes: 97 additions & 0 deletions bundle/config/validate/job_cluster_key_defined_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package validate

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)

func TestJobClusterKeyDefined(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {
JobSettings: &jobs.JobSettings{
Name: "job1",
JobClusters: []jobs.JobCluster{
{JobClusterKey: "do-not-exist"},
},
Tasks: []jobs.Task{
{JobClusterKey: "do-not-exist"},
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does exist here :D

},
},
},
},
},
},
}

diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobClusterKeyDefined())
require.Len(t, diags, 0)
require.NoError(t, diags.Error())
}

func TestJobClusterKeyNotDefined(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {
JobSettings: &jobs.JobSettings{
Name: "job1",
Tasks: []jobs.Task{
{JobClusterKey: "do-not-exist"},
},
},
},
},
},
},
}

diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobClusterKeyDefined())
require.Len(t, diags, 1)
require.NoError(t, diags.Error())
require.Equal(t, diags[0].Severity, diag.Warning)
require.Equal(t, diags[0].Summary, "job_cluster_key do-not-exist is not defined")
}

func TestJobClusterKeyDefinedInDifferentJob(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {
JobSettings: &jobs.JobSettings{
Name: "job1",
Tasks: []jobs.Task{
{JobClusterKey: "do-not-exist"},
},
},
},
"job2": {
JobSettings: &jobs.JobSettings{
Name: "job2",
JobClusters: []jobs.JobCluster{
{JobClusterKey: "do-not-exist"},
},
},
},
},
},
},
}

diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobClusterKeyDefined())
require.Len(t, diags, 1)
require.NoError(t, diags.Error())
require.Equal(t, diags[0].Severity, diag.Warning)
require.Equal(t, diags[0].Summary, "job_cluster_key do-not-exist is not defined")
}
43 changes: 43 additions & 0 deletions bundle/config/validate/validate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package validate

import (
"context"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

type validate struct {
}

type location struct {
path string
rb bundle.ReadOnlyBundle
}

func (l location) Location() dyn.Location {
return l.rb.Config().GetLocation(l.path)
}

func (l location) Path() dyn.Path {
return dyn.MustPathFromString(l.path)
}

// Apply implements bundle.Mutator.
func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), bundle.Parallel(
JobClusterKeyDefined(),
FilesToSync(),
ValidateSyncPatterns(),
))
}

// Name implements bundle.Mutator.
func (v *validate) Name() string {
return "validate"
}

func Validate() bundle.Mutator {
return &validate{}
}
79 changes: 79 additions & 0 deletions bundle/config/validate/validate_sync_patterns.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package validate

import (
"context"
"fmt"
"sync"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/fileset"
"golang.org/x/sync/errgroup"
)

func ValidateSyncPatterns() bundle.ReadOnlyMutator {
return &validateSyncPatterns{}
}

type validateSyncPatterns struct {
}

func (v *validateSyncPatterns) Name() string {
return "validate:validate_sync_patterns"
}

func (v *validateSyncPatterns) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
s := rb.Config().Sync
if len(s.Exclude) == 0 && len(s.Include) == 0 {
return nil
}

diags, err := checkPatterns(s.Exclude, "sync.exclude", rb)
if err != nil {
return diag.FromErr(err)
}

includeDiags, err := checkPatterns(s.Include, "sync.include", rb)
if err != nil {
return diag.FromErr(err)
}

return diags.Extend(includeDiags)
}

func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (diag.Diagnostics, error) {
var mu sync.Mutex
var errs errgroup.Group
var diags diag.Diagnostics

for i, pattern := range patterns {
index := i
p := pattern
errs.Go(func() error {
fs, err := fileset.NewGlobSet(rb.RootPath(), []string{p})
if err != nil {
return err
}

all, err := fs.All()
if err != nil {
return err
}

if len(all) == 0 {
loc := location{path: fmt.Sprintf("%s[%d]", path, index), rb: rb}
mu.Lock()
diags = diags.Append(diag.Diagnostic{
Severity: diag.Warning,
Summary: fmt.Sprintf("Pattern %s does not match any files", p),
Location: loc.Location(),
Path: loc.Path(),
})
mu.Unlock()
}
return nil
})
}

return diags, errs.Wait()
}
2 changes: 1 addition & 1 deletion bundle/deploy/files/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (m *delete) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
}

// Clean up sync snapshot file
sync, err := GetSync(ctx, b)
sync, err := GetSync(ctx, bundle.ReadOnly(b))
if err != nil {
return diag.FromErr(err)
}
Expand Down
Loading