Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions bundle/config/resources.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package config

import (
"fmt"

"github.com/databricks/bricks/bundle/config/resources"
)

Expand All @@ -13,6 +15,87 @@ type Resources struct {
Experiments map[string]*resources.MlflowExperiment `json:"experiments,omitempty"`
}

type UniqueResourceIdTracker struct {
Type map[string]string
ConfigPath map[string]string
}

// verifies merging is safe by checking no duplicate identifiers exist
func (r *Resources) VerifySafeMerge(other *Resources) error {
rootTracker, err := r.VerifyUniqueResourceIdentifiers()
if err != nil {
return err
}
otherTracker, err := other.VerifyUniqueResourceIdentifiers()
if err != nil {
return err
}
for k := range otherTracker.Type {
if _, ok := rootTracker.Type[k]; ok {
return fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
k,
rootTracker.Type[k],
rootTracker.ConfigPath[k],
otherTracker.Type[k],
otherTracker.ConfigPath[k],
)
}
}
return nil
}

// This function verifies there are no duplicate names used for the resource definations
func (r *Resources) VerifyUniqueResourceIdentifiers() (*UniqueResourceIdTracker, error) {
tracker := &UniqueResourceIdTracker{
Type: make(map[string]string),
ConfigPath: make(map[string]string),
}
for k := range r.Jobs {
tracker.Type[k] = "job"
tracker.ConfigPath[k] = r.Jobs[k].ConfigFilePath
}
for k := range r.Pipelines {
if _, ok := tracker.Type[k]; ok {
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
k,
tracker.Type[k],
tracker.ConfigPath[k],
"pipeline",
r.Pipelines[k].ConfigFilePath,
)
}
tracker.Type[k] = "pipeline"
tracker.ConfigPath[k] = r.Pipelines[k].ConfigFilePath
}
for k := range r.Models {
if _, ok := tracker.Type[k]; ok {
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
k,
tracker.Type[k],
tracker.ConfigPath[k],
"mlflow_model",
r.Models[k].ConfigFilePath,
)
}
tracker.Type[k] = "mlflow_model"
tracker.ConfigPath[k] = r.Models[k].ConfigFilePath
}
for k := range r.Experiments {
if _, ok := tracker.Type[k]; ok {
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
k,
tracker.Type[k],
tracker.ConfigPath[k],
"mlflow_experiment",
r.Experiments[k].ConfigFilePath,
)
}
tracker.Type[k] = "mlflow_experiment"
tracker.ConfigPath[k] = r.Experiments[k].ConfigFilePath
}
return tracker, nil
}

// SetConfigFilePath sets the specified path for all resources contained in this instance.
// This property is used to correctly resolve paths relative to the path
// of the configuration file they were defined in.
Expand Down
96 changes: 96 additions & 0 deletions bundle/config/resources_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package config

import (
"testing"

"github.com/databricks/bricks/bundle/config/resources"
"github.com/stretchr/testify/assert"
)

func TestVerifyUniqueResourceIdentifiers(t *testing.T) {
r := Resources{
Jobs: map[string]*resources.Job{
"foo": {
Paths: resources.Paths{
ConfigFilePath: "foo.yml",
},
},
},
Models: map[string]*resources.MlflowModel{
"bar": {
Paths: resources.Paths{
ConfigFilePath: "bar.yml",
},
},
},
Experiments: map[string]*resources.MlflowExperiment{
"foo": {
Paths: resources.Paths{
ConfigFilePath: "foo2.yml",
},
},
},
}
_, err := r.VerifyUniqueResourceIdentifiers()
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, mlflow_experiment at foo2.yml)")
}

func TestVerifySafeMerge(t *testing.T) {
r := Resources{
Jobs: map[string]*resources.Job{
"foo": {
Paths: resources.Paths{
ConfigFilePath: "foo.yml",
},
},
},
Models: map[string]*resources.MlflowModel{
"bar": {
Paths: resources.Paths{
ConfigFilePath: "bar.yml",
},
},
},
}
other := Resources{
Pipelines: map[string]*resources.Pipeline{
"foo": {
Paths: resources.Paths{
ConfigFilePath: "foo2.yml",
},
},
},
}
err := r.VerifySafeMerge(&other)
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, pipeline at foo2.yml)")
}

func TestVerifySafeMergeForSameResourceType(t *testing.T) {
r := Resources{
Jobs: map[string]*resources.Job{
"foo": {
Paths: resources.Paths{
ConfigFilePath: "foo.yml",
},
},
},
Models: map[string]*resources.MlflowModel{
"bar": {
Paths: resources.Paths{
ConfigFilePath: "bar.yml",
},
},
},
}
other := Resources{
Jobs: map[string]*resources.Job{
"foo": {
Paths: resources.Paths{
ConfigFilePath: "foo2.yml",
},
},
},
}
err := r.VerifySafeMerge(&other)
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, job at foo2.yml)")
}
11 changes: 10 additions & 1 deletion bundle/config/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,24 @@ func (r *Root) Load(path string) error {
if err != nil {
return err
}

r.Path = filepath.Dir(path)
r.SetConfigFilePath(path)
return nil

_, err = r.Resources.VerifyUniqueResourceIdentifiers()
return err
}

func (r *Root) Merge(other *Root) error {
// TODO: when hooking into merge semantics, disallow setting path on the target instance.
other.Path = ""

// Check for safe merge, protecting against duplicate resource identifiers
err := r.Resources.VerifySafeMerge(&other.Resources)
if err != nil {
return err
}

// TODO: define and test semantics for merging.
return mergo.MergeWithOverwrite(r, other)
}
Expand Down
19 changes: 19 additions & 0 deletions bundle/config/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,22 @@ func TestRootMergeMap(t *testing.T) {
assert.NoError(t, root.Merge(other))
assert.Equal(t, &Workspace{Host: "bar", Profile: "profile"}, root.Environments["development"].Workspace)
}

func TestDuplicateIdOnLoadReturnsError(t *testing.T) {
root := &Root{}
err := root.Load("./testdata/duplicate_resource_names_in_root/bundle.yml")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/bundle.yml, pipeline at ./testdata/duplicate_resource_names_in_root/bundle.yml)")
}

func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
root := &Root{}
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml")
require.NoError(t, err)

other := &Root{}
err = other.Load("./testdata/duplicate_resource_name_in_subconfiguration/resources.yml")
require.NoError(t, err)

err = root.Merge(other)
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
bundle:
name: test

workspace:
profile: test

resources:
jobs:
foo:
name: job foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
resources:
pipelines:
foo:
name: pipeline foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
bundle:
name: test

workspace:
profile: test

resources:
jobs:
foo:
name: job foo
pipelines:
foo:
name: pipeline foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
bundle:
name: test

workspace:
profile: test

resources:
jobs:
foo:
name: job foo
pipelines:
foo:
name: pipeline foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
bundle:
name: test

workspace:
profile: test

resources:
jobs:
foo:
name: job foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
resources:
pipelines:
foo:
name: pipeline foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bundle:
name: test

workspace:
profile: test
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
resources:
jobs:
foo:
name: job foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
resources:
pipelines:
foo:
name: pipeline foo
37 changes: 37 additions & 0 deletions bundle/tests/conflicting_resource_ids_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package config_tests

import (
"context"
"fmt"
"path/filepath"
"testing"

"github.com/databricks/bricks/bundle"
"github.com/databricks/bricks/bundle/config/mutator"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestConflictingResourceIdsNoSubconfig(t *testing.T) {
_, err := bundle.Load("./conflicting_resource_ids/no_subconfigurations")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/bundle.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath))
}

func TestConflictingResourceIdsOneSubconfig(t *testing.T) {
b, err := bundle.Load("./conflicting_resource_ids/one_subconfiguration")
require.NoError(t, err)
err = bundle.Apply(context.Background(), b, mutator.DefaultMutators())
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/bundle.yml")
resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath))
}

func TestConflictingResourceIdsTwoSubconfigs(t *testing.T) {
b, err := bundle.Load("./conflicting_resource_ids/two_subconfigurations")
require.NoError(t, err)
err = bundle.Apply(context.Background(), b, mutator.DefaultMutators())
resources1ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources1.yml")
resources2ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources2.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", resources1ConfigPath, resources2ConfigPath))
}