From dd0995d9b2a0a60d240b8bf8858f5487066b8df3 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 12 Jul 2023 00:42:45 +0200 Subject: [PATCH 1/4] Disallow notebooks in paths where files are expected --- bundle/config/mutator/translate_paths.go | 23 ++- bundle/config/mutator/translate_paths_test.go | 141 ++++++++++++++++++ 2 files changed, 161 insertions(+), 3 deletions(-) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index a7ccb3e9e2..79682d9c58 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -2,6 +2,7 @@ package mutator import ( "context" + "errors" "fmt" "os" "path" @@ -14,6 +15,14 @@ import ( "github.com/databricks/databricks-sdk-go/service/pipelines" ) +type ErrIsNotebook struct { + path string +} + +func (err ErrIsNotebook) Error() string { + return fmt.Sprintf("file %s is a notebook", err.path) +} + type translatePaths struct { seen map[string]string } @@ -94,14 +103,16 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st } func (m *translatePaths) translateFilePath(literal, localPath, remotePath string) (string, error) { - _, err := os.Stat(localPath) + nb, _, err := notebook.Detect(localPath) if os.IsNotExist(err) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to access %s: %w", localPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localPath, err) + } + if nb { + return "", ErrIsNotebook{localPath} } - return remotePath, nil } @@ -117,6 +128,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo if task.SparkPythonTask != nil { err = m.rewritePath(dir, b, &task.SparkPythonTask.PythonFile, m.translateFilePath) + if target := (&ErrIsNotebook{}); errors.As(err, target) { + return fmt.Errorf("please use notebook task type for notebooks. %s", target) + } if err != nil { return err } @@ -137,6 +151,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.File != nil { err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath) + if target := (&ErrIsNotebook{}); errors.As(err, target) { + return fmt.Errorf("please use libraries.notebook.path for notebooks. %s", target) + } if err != nil { return err } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 1bcb8b1b2d..3c200a8876 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -4,6 +4,7 @@ import ( "context" "os" "path/filepath" + "regexp" "testing" "github.com/databricks/cli/bundle" @@ -455,3 +456,143 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { err := mutator.TranslatePaths().Apply(context.Background(), bundle) assert.EqualError(t, err, "file ./doesnt_exist.py not found") } + +func TestSparkPythonTaskJobWithNotebookSourceError(t *testing.T) { + dir := t.TempDir() + touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "./my_notebook.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, "please use notebook task type for notebooks") +} + +func TestNotebookTaskJobWithFileSourceError(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "my_file.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "./my_file.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.Regexp(t, regexp.MustCompile("file at .* is not a notebook"), err.Error()) +} + +func TestNotebookLibraryPipelineWithFileSourceError(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "my_file.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + Notebook: &pipelines.NotebookLibrary{ + Path: "./my_file.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.Regexp(t, regexp.MustCompile("file at .* is not a notebook"), err.Error()) +} + +func TestFileLibraryPipelineWithNotebookSourceError(t *testing.T) { + dir := t.TempDir() + touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + File: &pipelines.FileLibrary{ + Path: "./my_notebook.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, "please use libraries.notebook.path for notebooks") +} From 5fdddd6289b8c96babdd6caa6261285392b4682f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 12 Jul 2023 00:58:49 +0200 Subject: [PATCH 2/4] - --- bundle/config/mutator/translate_paths.go | 2 +- bundle/config/mutator/translate_paths_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 79682d9c58..6dbf632a01 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -152,7 +152,7 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.File != nil { err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath) if target := (&ErrIsNotebook{}); errors.As(err, target) { - return fmt.Errorf("please use libraries.notebook.path for notebooks. %s", target) + return fmt.Errorf("please specify notebooks as notebook libraries (use libraries.notebook.path). %s", target) } if err != nil { return err diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 3c200a8876..ee0e3fc792 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -594,5 +594,5 @@ func TestFileLibraryPipelineWithNotebookSourceError(t *testing.T) { } err := mutator.TranslatePaths().Apply(context.Background(), bundle) - assert.ErrorContains(t, err, "please use libraries.notebook.path for notebooks") + assert.ErrorContains(t, err, "please specify notebooks as notebook libraries (use libraries.notebook.path)") } From 52d27685a1da6dc4ae6cc2efc9b3d69a98c9cc1c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 12 Jul 2023 00:59:58 +0200 Subject: [PATCH 3/4] - --- bundle/config/mutator/translate_paths.go | 2 +- bundle/config/mutator/translate_paths_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 6dbf632a01..daed7a83ba 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -152,7 +152,7 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.File != nil { err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath) if target := (&ErrIsNotebook{}); errors.As(err, target) { - return fmt.Errorf("please specify notebooks as notebook libraries (use libraries.notebook.path). %s", target) + return fmt.Errorf("please specify notebooks as notebook libraries. %s", target) } if err != nil { return err diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index ee0e3fc792..2c87ca9b76 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -594,5 +594,5 @@ func TestFileLibraryPipelineWithNotebookSourceError(t *testing.T) { } err := mutator.TranslatePaths().Apply(context.Background(), bundle) - assert.ErrorContains(t, err, "please specify notebooks as notebook libraries (use libraries.notebook.path)") + assert.ErrorContains(t, err, "please specify notebooks as notebook libraries") } From 885effaf6142e0ae8429dca719aba9efd8a11768 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 12 Jul 2023 13:41:59 +0200 Subject: [PATCH 4/4] added tests --- bundle/config/mutator/translate_paths.go | 22 +++++++++++++++---- bundle/config/mutator/translate_paths_test.go | 17 +++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index daed7a83ba..08f8398613 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -20,7 +20,15 @@ type ErrIsNotebook struct { } func (err ErrIsNotebook) Error() string { - return fmt.Sprintf("file %s is a notebook", err.path) + return fmt.Sprintf("file at %s is a notebook", err.path) +} + +type ErrIsNotNotebook struct { + path string +} + +func (err ErrIsNotNotebook) Error() string { + return fmt.Sprintf("file at %s is not a notebook", err.path) } type translatePaths struct { @@ -95,7 +103,7 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st return "", fmt.Errorf("unable to determine if %s is a notebook: %w", localPath, err) } if !nb { - return "", fmt.Errorf("file at %s is not a notebook", localPath) + return "", ErrIsNotNotebook{localPath} } // Upon import, notebooks are stripped of their extension. @@ -121,6 +129,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo if task.NotebookTask != nil { err = m.rewritePath(dir, b, &task.NotebookTask.NotebookPath, m.translateNotebookPath) + if target := (&ErrIsNotNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a notebook for "tasks.notebook_task.notebook_path" but got a file: %w`, target) + } if err != nil { return err } @@ -129,7 +140,7 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo if task.SparkPythonTask != nil { err = m.rewritePath(dir, b, &task.SparkPythonTask.PythonFile, m.translateFilePath) if target := (&ErrIsNotebook{}); errors.As(err, target) { - return fmt.Errorf("please use notebook task type for notebooks. %s", target) + return fmt.Errorf(`expected a file for "tasks.spark_python_task.python_file" but got a notebook: %w`, target) } if err != nil { return err @@ -144,6 +155,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.Notebook != nil { err = m.rewritePath(dir, b, &library.Notebook.Path, m.translateNotebookPath) + if target := (&ErrIsNotNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a notebook for "libraries.notebook.path" but got a file: %w`, target) + } if err != nil { return err } @@ -152,7 +166,7 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.File != nil { err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath) if target := (&ErrIsNotebook{}); errors.As(err, target) { - return fmt.Errorf("please specify notebooks as notebook libraries. %s", target) + return fmt.Errorf(`expected a file for "libraries.file.path" but got a notebook: %w`, target) } if err != nil { return err diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 2c87ca9b76..b87f4f6768 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -4,7 +4,6 @@ import ( "context" "os" "path/filepath" - "regexp" "testing" "github.com/databricks/cli/bundle" @@ -457,7 +456,7 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { assert.EqualError(t, err, "file ./doesnt_exist.py not found") } -func TestSparkPythonTaskJobWithNotebookSourceError(t *testing.T) { +func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { dir := t.TempDir() touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) @@ -489,10 +488,10 @@ func TestSparkPythonTaskJobWithNotebookSourceError(t *testing.T) { } err := mutator.TranslatePaths().Apply(context.Background(), bundle) - assert.ErrorContains(t, err, "please use notebook task type for notebooks") + assert.ErrorContains(t, err, `expected a file for "tasks.spark_python_task.python_file" but got a notebook`) } -func TestNotebookTaskJobWithFileSourceError(t *testing.T) { +func TestJobNotebookTaskWithFileSourceError(t *testing.T) { dir := t.TempDir() touchEmptyFile(t, filepath.Join(dir, "my_file.py")) @@ -524,10 +523,10 @@ func TestNotebookTaskJobWithFileSourceError(t *testing.T) { } err := mutator.TranslatePaths().Apply(context.Background(), bundle) - assert.Regexp(t, regexp.MustCompile("file at .* is not a notebook"), err.Error()) + assert.ErrorContains(t, err, `expected a notebook for "tasks.notebook_task.notebook_path" but got a file`) } -func TestNotebookLibraryPipelineWithFileSourceError(t *testing.T) { +func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { dir := t.TempDir() touchEmptyFile(t, filepath.Join(dir, "my_file.py")) @@ -559,10 +558,10 @@ func TestNotebookLibraryPipelineWithFileSourceError(t *testing.T) { } err := mutator.TranslatePaths().Apply(context.Background(), bundle) - assert.Regexp(t, regexp.MustCompile("file at .* is not a notebook"), err.Error()) + assert.ErrorContains(t, err, `expected a notebook for "libraries.notebook.path" but got a file`) } -func TestFileLibraryPipelineWithNotebookSourceError(t *testing.T) { +func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { dir := t.TempDir() touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) @@ -594,5 +593,5 @@ func TestFileLibraryPipelineWithNotebookSourceError(t *testing.T) { } err := mutator.TranslatePaths().Apply(context.Background(), bundle) - assert.ErrorContains(t, err, "please specify notebooks as notebook libraries") + assert.ErrorContains(t, err, `expected a file for "libraries.file.path" but got a notebook`) }