From f6232f5aaec40812893bef5b04fa34b83327ced6 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 11 Oct 2024 16:40:15 +0200 Subject: [PATCH 01/16] [WIP] Add support for non python ipynb notebooks --- internal/filer_test.go | 31 +++++---- internal/helpers.go | 9 ++- .../workspace_files_extensions_client.go | 15 +++- .../workspace_files_extensions_client_test.go | 69 ++++++++++++++++--- 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index bc4c94808e..04db494a96 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -360,7 +360,7 @@ func TestAccFilerReadDir(t *testing.T) { } } -var jupyterNotebookContent1 = ` +var pythonJupyterNotebookContent1 = ` { "cells": [ { @@ -384,7 +384,10 @@ var jupyterNotebookContent1 = ` } ` -var jupyterNotebookContent2 = ` +// TODO: Does detect notebook work with notebooks exported from databricks? +// They typically do not have the top level "language" key set. + +var pythonJupyterNotebookContent2 = ` { "cells": [ { @@ -408,6 +411,10 @@ var jupyterNotebookContent2 = ` } ` +// TODO: Continue adding tests for other types of notebooks than python here. +// Exporting from a workspace makes this work easier. + + func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { t.Parallel() @@ -424,7 +431,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { require.NoError(t, err) err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) + err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) require.NoError(t, err) // Assert contents after initial upload @@ -432,7 +439,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") + filerTest{t, f}.assertContents(ctx, "pythonJupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") // Assert uploading a second time fails due to overwrite mode missing err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) @@ -451,9 +458,9 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { assert.ErrorIs(t, err, fs.ErrExist) assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error()) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2)) + err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2)) assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/jupyterNb$`), err.Error()) + assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pythonJupyterNb$`), err.Error()) } func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { @@ -472,7 +479,7 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { require.NoError(t, err) err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) + err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) require.NoError(t, err) // Assert contents after initial upload @@ -491,7 +498,7 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { require.NoError(t, err) err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists) require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2), filer.OverwriteIfExists) + err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2), filer.OverwriteIfExists) require.NoError(t, err) // Assert contents have been overwritten @@ -515,8 +522,8 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"foo.r", "print('foo')"}, {"foo.scala", "println('foo')"}, {"foo.sql", "SELECT 'foo'"}, - {"jupyterNb.ipynb", jupyterNotebookContent1}, - {"jupyterNb2.ipynb", jupyterNotebookContent2}, + {"jupyterNb.ipynb", pythonJupyterNotebookContent1}, + {"jupyterNb2.ipynb", pythonJupyterNotebookContent2}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, @@ -582,7 +589,7 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer { }{ {"foo.py", "# Databricks notebook source\nprint('first upload'))"}, {"bar.py", "print('foo')"}, - {"jupyter.ipynb", jupyterNotebookContent1}, + {"jupyter.ipynb", pythonJupyterNotebookContent1}, {"pretender", "not a notebook"}, {"dir/file.txt", "file content"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, @@ -756,7 +763,7 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { assert.ErrorIs(t, err, fs.ErrNotExist) // Case 2: Jupyter Notebook - err = wf.Write(ctx, "bar.ipynb", strings.NewReader(jupyterNotebookContent1)) + err = wf.Write(ctx, "bar.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) require.NoError(t, err) // The Jupyter notebook should exist but not the source notebook diff --git a/internal/helpers.go b/internal/helpers.go index 9387706bb3..e99c951dfe 100644 --- a/internal/helpers.go +++ b/internal/helpers.go @@ -20,6 +20,7 @@ import ( "time" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/internal/acc" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/cmd" @@ -561,12 +562,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsFiler(t *testing.T) (filer.Filer, string) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + ctx, w := acc.WorkspaceTest(t) - ctx := context.Background() - w := databricks.Must(databricks.NewWorkspaceClient()) - tmpdir := TemporaryWorkspaceDir(t, w) - f, err := filer.NewWorkspaceFilesClient(w, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, w.W) + f, err := filer.NewWorkspaceFilesClient(w.W, tmpdir) require.NoError(t, err) // Check if we can use this API here, skip test if we cannot. diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index b24ecf7eef..509fe89310 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -28,6 +28,9 @@ var extensionsToLanguages = map[string]workspace.Language{ ".r": workspace.LanguageR, ".scala": workspace.LanguageScala, ".sql": workspace.LanguageSql, + + // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. + // Thus, we do not need to check the language for .ipynb files. ".ipynb": workspace.LanguagePython, } @@ -47,6 +50,10 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) return info.(wsfsFileInfo), err } +// TODO: Add end to end tests that the filer works for all .ipynb cases. +// TODO: Also fix the sync issues. OR add tests that sync works fine with non +// python notebooks. Is this needed in the first place? + // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { @@ -75,8 +82,9 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex return nil, nil } - // Not the correct language. Return early. - if stat.Language != extensionsToLanguages[ext] { + // Not the correct language. Return early. Note: All languages are supported + // for Jupyter notebooks. + if ext != ".ipynb" && stat.Language != extensionsToLanguages[ext] { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language) return nil, nil } @@ -120,7 +128,8 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo) // If the notebook was exported as a Jupyter notebook, the extension should be .ipynb. - if stat.Language == workspace.LanguagePython && stat.ReposExportFormat == workspace.ExportFormatJupyter { + // TODO: Test this. + if stat.ReposExportFormat == workspace.ExportFormatJupyter { ext = ".ipynb" } diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 321c437128..1dd8a242db 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -44,15 +44,6 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { filePath: "/dir/foo.py", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", }, - { - name: "python jupyter notebook and file", - language: workspace.LanguagePython, - notebookExportFormat: workspace.ExportFormatJupyter, - notebookPath: "/dir/foo", - filePath: "/dir/foo.py", - // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. - expectedError: "", - }, { name: "scala source notebook and file", language: workspace.LanguageScala, @@ -82,6 +73,66 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", + filePath: "/dir/foo.py", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "scala jupyter notebook and file", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.scala", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "sql jupyter notebook and file", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "r jupyter notebook and file", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "python jupyter notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "scala jupyter notebook and file", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r jupyter notebook and file", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql jupyter notebook and file", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", filePath: "/dir/foo.ipynb", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", }, From 2eb6ea5d91a376ec96f6d9120d91f86924910f5e Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 20:10:02 +0200 Subject: [PATCH 02/16] add cases for r notebooks --- internal/filer_test.go | 400 +++++++++++++++----------- internal/helpers.go | 20 +- internal/testdata/notebooks/py1.ipynb | 27 ++ internal/testdata/notebooks/py2.ipynb | 27 ++ internal/testdata/notebooks/r1.ipynb | 25 ++ internal/testdata/notebooks/r2.ipynb | 25 ++ 6 files changed, 357 insertions(+), 167 deletions(-) create mode 100644 internal/testdata/notebooks/py1.ipynb create mode 100644 internal/testdata/notebooks/py2.ipynb create mode 100644 internal/testdata/notebooks/r1.ipynb create mode 100644 internal/testdata/notebooks/r2.ipynb diff --git a/internal/filer_test.go b/internal/filer_test.go index 04db494a96..b120c3e747 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -39,7 +39,7 @@ func (f filerTest) assertContents(ctx context.Context, name string, contents str assert.Equal(f, contents, body.String()) } -func (f filerTest) assertContentsJupyter(ctx context.Context, name string) { +func (f filerTest) assertContentsJupyter(ctx context.Context, name string, language string) { reader, err := f.Read(ctx, name) if !assert.NoError(f, err) { return @@ -62,6 +62,7 @@ func (f filerTest) assertContentsJupyter(ctx context.Context, name string) { // Since a roundtrip to the workspace changes a Jupyter notebook's payload, // the best we can do is assert that the nbformat is correct. assert.EqualValues(f, 4, actual["nbformat"]) + assert.Equal(f, language, actual["metadata"].(map[string]any)["language_info"].(map[string]any)["name"]) } func (f filerTest) assertNotExists(ctx context.Context, name string) { @@ -360,61 +361,9 @@ func TestAccFilerReadDir(t *testing.T) { } } -var pythonJupyterNotebookContent1 = ` -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Jupyter Notebook Version 1\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 - } -` - -// TODO: Does detect notebook work with notebooks exported from databricks? -// They typically do not have the top level "language" key set. - -var pythonJupyterNotebookContent2 = ` -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Jupyter Notebook Version 2\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 - } -` - // TODO: Continue adding tests for other types of notebooks than python here. // Exporting from a workspace makes this work easier. - func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { t.Parallel() @@ -422,45 +371,71 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { ctx := context.Background() var err error - // Upload the notebooks - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\"")) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) - require.NoError(t, err) - err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) - require.NoError(t, err) - - // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "pythonJupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") - - // Assert uploading a second time fails due to overwrite mode missing - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pyNb$`), err.Error()) - - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/rNb$`), err.Error()) + for _, tcases := range []struct { + name string + nameWithoutExt string + content1 string + expected1 string + content2 string + }{ + { + name: "pyNb.py", + nameWithoutExt: "pyNb", + content1: "# Databricks notebook source\nprint('first upload'))", + expected1: "# Databricks notebook source\nprint('first upload'))", + content2: "# Databricks notebook source\nprint('second upload'))", + }, + { + name: "rNb.r", + nameWithoutExt: "rNb", + content1: "# Databricks notebook source\nprint('first upload'))", + expected1: "# Databricks notebook source\nprint('first upload'))", + content2: "# Databricks notebook source\nprint('second upload'))", + }, + { + name: "sqlNb.sql", + nameWithoutExt: "sqlNb", + content1: "-- Databricks notebook source\n SELECT \"first upload\"", + expected1: "-- Databricks notebook source\n SELECT \"first upload\"", + content2: "-- Databricks notebook source\n SELECT \"second upload\"", + }, + { + name: "scalaNb.scala", + nameWithoutExt: "scalaNb", + content1: "// Databricks notebook source\n println(\"first upload\"))", + expected1: "// Databricks notebook source\n println(\"first upload\"))", + content2: "// Databricks notebook source\n println(\"second upload\"))", + }, + { + name: "pythonJupyterNb.ipynb", + nameWithoutExt: "pythonJupyterNb", + content1: readFile(t, "testdata/notebooks/py1.ipynb"), + expected1: "# Databricks notebook source\nprint(1)", + content2: readFile(t, "testdata/notebooks/py2.ipynb"), + }, + { + name: "rJupyterNb.ipynb", + nameWithoutExt: "rJupyterNb", + content1: readFile(t, "testdata/notebooks/r1.ipynb"), + expected1: "# Databricks notebook source\nprint(1)", + content2: readFile(t, "testdata/notebooks/r2.ipynb"), + }, + } { + t.Run(tcases.name, func(t *testing.T) { + // Upload the notebook + err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content1)) + require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("# Databricks notebook source\n SELECT \"second upload\")")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/sqlNb$`), err.Error()) + // Assert contents after initial upload + filerTest{t, f}.assertContents(ctx, tcases.nameWithoutExt, tcases.expected1) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("# Databricks notebook source\n println(\"second upload\"))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error()) + // Assert uploading a second time fails due to overwrite mode missing + err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content2)) + assert.ErrorIs(t, err, fs.ErrExist) + assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tcases.nameWithoutExt+`$`), err.Error()) - err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2)) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pythonJupyterNb$`), err.Error()) + }) + } } func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { @@ -470,45 +445,83 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { ctx := context.Background() var err error - // Upload notebooks - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\"")) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) - require.NoError(t, err) + for _, tcases := range []struct { + name string + nameWithoutExt string + content1 string + expected1 string + content2 string + expected2 string + }{ + { + name: "pyNb.py", + nameWithoutExt: "pyNb", + content1: "# Databricks notebook source\nprint('first upload'))", + expected1: "# Databricks notebook source\nprint('first upload'))", + content2: "# Databricks notebook source\nprint('second upload'))", + expected2: "# Databricks notebook source\nprint('second upload'))", + }, + { + name: "rNb.r", + nameWithoutExt: "rNb", + content1: "# Databricks notebook source\nprint('first upload'))", + expected1: "# Databricks notebook source\nprint('first upload'))", + content2: "# Databricks notebook source\nprint('second upload'))", + expected2: "# Databricks notebook source\nprint('second upload'))", + }, + { + name: "sqlNb.sql", + nameWithoutExt: "sqlNb", + content1: "-- Databricks notebook source\n SELECT \"first upload\"", + expected1: "-- Databricks notebook source\n SELECT \"first upload\"", + content2: "-- Databricks notebook source\n SELECT \"second upload\"", + expected2: "-- Databricks notebook source\n SELECT \"second upload\"", + }, + { + name: "scalaNb.scala", + nameWithoutExt: "scalaNb", + content1: "// Databricks notebook source\n println(\"first upload\"))", + expected1: "// Databricks notebook source\n println(\"first upload\"))", + content2: "// Databricks notebook source\n println(\"second upload\"))", + expected2: "// Databricks notebook source\n println(\"second upload\"))", + }, + { + name: "pythonJupyterNb.ipynb", + nameWithoutExt: "pythonJupyterNb", + content1: readFile(t, "testdata/notebooks/py1.ipynb"), + expected1: "# Databricks notebook source\nprint(1))", + content2: readFile(t, "testdata/notebooks/py2.ipynb"), + expected2: "# Databricks notebook source\nprint(2))", + }, + { + name: "rJupyterNb.ipynb", + nameWithoutExt: "rJupyterNb", + content1: readFile(t, "testdata/notebooks/r1.ipynb"), + expected1: "# Databricks notebook source\nprint(1))", + content2: readFile(t, "testdata/notebooks/r2.ipynb"), + expected2: "# Databricks notebook source\nprint(2))", + }, + } { + t.Run(tcases.name, func(t *testing.T) { + // Upload the notebook + err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content1)) + require.NoError(t, err) - // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") + // Assert contents after initial upload + filerTest{t, f}.assertContents(ctx, tcases.nameWithoutExt, tcases.expected1) - // Upload notebooks a second time, overwriting the initial uplaods - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"second upload\""), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2), filer.OverwriteIfExists) - require.NoError(t, err) + // Upload the notebook a second time with overwrite flag + err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content2), filer.OverwriteIfExists) + require.NoError(t, err) - // Assert contents have been overwritten - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('second upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('second upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"second upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"second upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 2\")") + // Assert contents after second upload + filerTest{t, f}.assertContents(ctx, tcases.nameWithoutExt, tcases.expected2) + }) + } } +// TODO: Add a test that the exported file has the right extension / language_info set? +// Required for DABs in the workspace. func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { t.Parallel() @@ -522,8 +535,9 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"foo.r", "print('foo')"}, {"foo.scala", "println('foo')"}, {"foo.sql", "SELECT 'foo'"}, - {"jupyterNb.ipynb", pythonJupyterNotebookContent1}, - {"jupyterNb2.ipynb", pythonJupyterNotebookContent2}, + {"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, + {"py2.ipynb", readFile(t, "testdata/notebooks/py2.ipynb")}, + {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, @@ -561,9 +575,10 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { "foo.r", "foo.scala", "foo.sql", - "jupyterNb.ipynb", - "jupyterNb2.ipynb", + "py1.ipynb", + "py2.ipynb", "pyNb.py", + "r1.ipynb", "rNb.r", "scalaNb.scala", "sqlNb.sql", @@ -589,7 +604,8 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer { }{ {"foo.py", "# Databricks notebook source\nprint('first upload'))"}, {"bar.py", "print('foo')"}, - {"jupyter.ipynb", pythonJupyterNotebookContent1}, + {"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, + {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"pretender", "not a notebook"}, {"dir/file.txt", "file content"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, @@ -615,7 +631,8 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) { // Read contents of test fixtures as a sanity check. filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))") filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')") - filerTest{t, wf}.assertContentsJupyter(ctx, "jupyter.ipynb") + filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python") + filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "R") filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content") filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')") filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook") @@ -655,10 +672,15 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) { require.NoError(t, err) filerTest{t, wf}.assertNotExists(ctx, "bar.py") - // Delete jupyter notebook - err = wf.Delete(ctx, "jupyter.ipynb") + // Delete python jupyter notebook + err = wf.Delete(ctx, "p1.ipynb") require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "jupyter.ipynb") + filerTest{t, wf}.assertNotExists(ctx, "p1.ipynb") + + // Delete r jupyter notebook + err = wf.Delete(ctx, "r1.ipynb") + require.NoError(t, err) + filerTest{t, wf}.assertNotExists(ctx, "r1.ipynb") // Delete non-existent file err = wf.Delete(ctx, "non-existent.py") @@ -700,10 +722,16 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { assert.Equal(t, "bar.py", info.Name()) assert.False(t, info.IsDir()) - // Stat on a Jupyter notebook - info, err = wf.Stat(ctx, "jupyter.ipynb") + // Stat on a python jupyter notebook + info, err = wf.Stat(ctx, "p1.ipynb") + require.NoError(t, err) + assert.Equal(t, "p1.ipynb", info.Name()) + assert.False(t, info.IsDir()) + + // Stat on an R jupyter notebook + info, err = wf.Stat(ctx, "r1.ipynb") require.NoError(t, err) - assert.Equal(t, "jupyter.ipynb", info.Name()) + assert.Equal(t, "r1.ipynb", info.Name()) assert.False(t, info.IsDir()) // Stat on a directory @@ -746,32 +774,84 @@ func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { t.Parallel() - ctx := context.Background() - wf, _ := setupWsfsExtensionsFiler(t) - - // Case 1: Source Notebook - err := wf.Write(ctx, "foo.py", strings.NewReader("# Databricks notebook source\nprint('foo')")) - require.NoError(t, err) + // Case 1: Writing source notebooks. + for _, tc := range []struct { + language string + sourceName string + sourceContent string + jupyterName string + jupyterContent string + }{ + { + language: "python", + sourceName: "foo.py", + sourceContent: "# Databricks notebook source\nprint('foo')", + jupyterName: "foo.ipynb", + }, + { + language: "r", + sourceName: "foo.r", + sourceContent: "# Databricks notebook source\nprint('foo')", + jupyterName: "foo.ipynb", + }, + } { + t.Run(tc.language, func(t *testing.T) { + t.Parallel() - // The source notebook should exist but not the Jupyter notebook - filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('foo')") - _, err = wf.Stat(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) - _, err = wf.Read(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) - err = wf.Delete(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) + ctx := context.Background() + wf, _ := setupWsfsExtensionsFiler(t) + + err := wf.Write(ctx, tc.sourceName, strings.NewReader(tc.sourceContent)) + require.NoError(t, err) + + // The source notebook should exist but not the Jupyter notebook + filerTest{t, wf}.assertContents(ctx, tc.sourceName, tc.sourceContent) + _, err = wf.Stat(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + _, err = wf.Read(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + err = wf.Delete(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + }) + } - // Case 2: Jupyter Notebook - err = wf.Write(ctx, "bar.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) - require.NoError(t, err) + // Case 2: Writing Jupyter notebooks. + for _, tc := range []struct { + language string + sourceName string + jupyterName string + jupyterContent string + }{ + { + language: "python", + sourceName: "bar.py", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"), + }, + { + language: "R", + sourceName: "bar.r", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"), + }, + } { + t.Run(tc.language, func(t *testing.T) { + t.Parallel() - // The Jupyter notebook should exist but not the source notebook - filerTest{t, wf}.assertContentsJupyter(ctx, "bar.ipynb") - _, err = wf.Stat(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - _, err = wf.Read(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - err = wf.Delete(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) + ctx := context.Background() + wf, _ := setupWsfsExtensionsFiler(t) + + err := wf.Write(ctx, tc.jupyterName, strings.NewReader(tc.jupyterContent)) + require.NoError(t, err) + + // The Jupyter notebook should exist but not the source notebook + filerTest{t, wf}.assertContentsJupyter(ctx, tc.jupyterName, tc.language) + _, err = wf.Stat(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + _, err = wf.Read(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + err = wf.Delete(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + }) + } } diff --git a/internal/helpers.go b/internal/helpers.go index e99c951dfe..ee68708bc4 100644 --- a/internal/helpers.go +++ b/internal/helpers.go @@ -352,6 +352,13 @@ func RequireErrorRun(t *testing.T, args ...string) (bytes.Buffer, bytes.Buffer, return stdout, stderr, err } +func readFile(t *testing.T, name string) string { + b, err := os.ReadFile(name) + require.NoError(t, err) + + return string(b) +} + func writeFile(t *testing.T, name string, body string) string { f, err := os.Create(filepath.Join(t.TempDir(), name)) require.NoError(t, err) @@ -562,10 +569,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsFiler(t *testing.T) (filer.Filer, string) { - ctx, w := acc.WorkspaceTest(t) + ctx, wt := acc.WorkspaceTest(t) - tmpdir := TemporaryWorkspaceDir(t, w.W) - f, err := filer.NewWorkspaceFilesClient(w.W, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, wt.W) + f, err := filer.NewWorkspaceFilesClient(wt.W, tmpdir) require.NoError(t, err) // Check if we can use this API here, skip test if we cannot. @@ -579,11 +586,10 @@ func setupWsfsFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + _, wt := acc.WorkspaceTest(t) - w := databricks.Must(databricks.NewWorkspaceClient()) - tmpdir := TemporaryWorkspaceDir(t, w) - f, err := filer.NewWorkspaceFilesExtensionsClient(w, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, wt.W) + f, err := filer.NewWorkspaceFilesExtensionsClient(wt.W, tmpdir) require.NoError(t, err) return f, tmpdir diff --git a/internal/testdata/notebooks/py1.ipynb b/internal/testdata/notebooks/py1.ipynb new file mode 100644 index 0000000000..0a44ce0eeb --- /dev/null +++ b/internal/testdata/notebooks/py1.ipynb @@ -0,0 +1,27 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/py2.ipynb b/internal/testdata/notebooks/py2.ipynb new file mode 100644 index 0000000000..8b2ccde1f2 --- /dev/null +++ b/internal/testdata/notebooks/py2.ipynb @@ -0,0 +1,27 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/r1.ipynb b/internal/testdata/notebooks/r1.ipynb new file mode 100644 index 0000000000..6280426a37 --- /dev/null +++ b/internal/testdata/notebooks/r1.ipynb @@ -0,0 +1,25 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "name": "R" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/r2.ipynb b/internal/testdata/notebooks/r2.ipynb new file mode 100644 index 0000000000..6280426a37 --- /dev/null +++ b/internal/testdata/notebooks/r2.ipynb @@ -0,0 +1,25 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "name": "R" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From cd8cc2c531faf8fd9838ca19056463785583a143 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 21:02:55 +0200 Subject: [PATCH 03/16] add tests for scala notebooks --- internal/filer_test.go | 112 +++++++++++++++-------- internal/testdata/notebooks/r2.ipynb | 8 +- internal/testdata/notebooks/scala1.ipynb | 38 ++++++++ internal/testdata/notebooks/scala2.ipynb | 38 ++++++++ 4 files changed, 157 insertions(+), 39 deletions(-) create mode 100644 internal/testdata/notebooks/scala1.ipynb create mode 100644 internal/testdata/notebooks/scala2.ipynb diff --git a/internal/filer_test.go b/internal/filer_test.go index b120c3e747..7e0fb1ba60 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -371,7 +371,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { ctx := context.Background() var err error - for _, tcases := range []struct { + for _, tc := range []struct { name string nameWithoutExt string content1 string @@ -381,16 +381,16 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { { name: "pyNb.py", nameWithoutExt: "pyNb", - content1: "# Databricks notebook source\nprint('first upload'))", - expected1: "# Databricks notebook source\nprint('first upload'))", - content2: "# Databricks notebook source\nprint('second upload'))", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", }, { name: "rNb.r", nameWithoutExt: "rNb", - content1: "# Databricks notebook source\nprint('first upload'))", - expected1: "# Databricks notebook source\nprint('first upload'))", - content2: "# Databricks notebook source\nprint('second upload'))", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", }, { name: "sqlNb.sql", @@ -402,9 +402,9 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { { name: "scalaNb.scala", nameWithoutExt: "scalaNb", - content1: "// Databricks notebook source\n println(\"first upload\"))", - expected1: "// Databricks notebook source\n println(\"first upload\"))", - content2: "// Databricks notebook source\n println(\"second upload\"))", + content1: "// Databricks notebook source\n println(\"first upload\")", + expected1: "// Databricks notebook source\n println(\"first upload\")", + content2: "// Databricks notebook source\n println(\"second upload\")", }, { name: "pythonJupyterNb.ipynb", @@ -420,19 +420,26 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { expected1: "# Databricks notebook source\nprint(1)", content2: readFile(t, "testdata/notebooks/r2.ipynb"), }, + { + name: "scalaJupyterNb.ipynb", + nameWithoutExt: "scalaJupyterNb", + content1: readFile(t, "testdata/notebooks/scala1.ipynb"), + expected1: "// Databricks notebook source\nprintln(1)", + content2: readFile(t, "testdata/notebooks/scala2.ipynb"), + }, } { - t.Run(tcases.name, func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { // Upload the notebook - err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content1)) + err = f.Write(ctx, tc.name, strings.NewReader(tc.content1)) require.NoError(t, err) // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, tcases.nameWithoutExt, tcases.expected1) + filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected1) // Assert uploading a second time fails due to overwrite mode missing - err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content2)) + err = f.Write(ctx, tc.name, strings.NewReader(tc.content2)) assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tcases.nameWithoutExt+`$`), err.Error()) + assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error()) }) } @@ -456,18 +463,18 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { { name: "pyNb.py", nameWithoutExt: "pyNb", - content1: "# Databricks notebook source\nprint('first upload'))", - expected1: "# Databricks notebook source\nprint('first upload'))", - content2: "# Databricks notebook source\nprint('second upload'))", - expected2: "# Databricks notebook source\nprint('second upload'))", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", }, { name: "rNb.r", nameWithoutExt: "rNb", - content1: "# Databricks notebook source\nprint('first upload'))", - expected1: "# Databricks notebook source\nprint('first upload'))", - content2: "# Databricks notebook source\nprint('second upload'))", - expected2: "# Databricks notebook source\nprint('second upload'))", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", }, { name: "sqlNb.sql", @@ -480,26 +487,34 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { { name: "scalaNb.scala", nameWithoutExt: "scalaNb", - content1: "// Databricks notebook source\n println(\"first upload\"))", - expected1: "// Databricks notebook source\n println(\"first upload\"))", - content2: "// Databricks notebook source\n println(\"second upload\"))", - expected2: "// Databricks notebook source\n println(\"second upload\"))", + content1: "// Databricks notebook source\n println(\"first upload\")", + expected1: "// Databricks notebook source\n println(\"first upload\")", + content2: "// Databricks notebook source\n println(\"second upload\")", + expected2: "// Databricks notebook source\n println(\"second upload\")", }, { name: "pythonJupyterNb.ipynb", nameWithoutExt: "pythonJupyterNb", content1: readFile(t, "testdata/notebooks/py1.ipynb"), - expected1: "# Databricks notebook source\nprint(1))", + expected1: "# Databricks notebook source\nprint(1)", content2: readFile(t, "testdata/notebooks/py2.ipynb"), - expected2: "# Databricks notebook source\nprint(2))", + expected2: "# Databricks notebook source\nprint(2)", }, { name: "rJupyterNb.ipynb", nameWithoutExt: "rJupyterNb", content1: readFile(t, "testdata/notebooks/r1.ipynb"), - expected1: "# Databricks notebook source\nprint(1))", + expected1: "# Databricks notebook source\nprint(1)", content2: readFile(t, "testdata/notebooks/r2.ipynb"), - expected2: "# Databricks notebook source\nprint(2))", + expected2: "# Databricks notebook source\nprint(2)", + }, + { + name: "scalaJupyterNb.ipynb", + nameWithoutExt: "scalaJupyterNb", + content1: readFile(t, "testdata/notebooks/scala1.ipynb"), + expected1: "// Databricks notebook source\nprintln(1)", + content2: readFile(t, "testdata/notebooks/scala2.ipynb"), + expected2: "// Databricks notebook source\nprintln(2)", }, } { t.Run(tcases.name, func(t *testing.T) { @@ -540,6 +555,7 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, + {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, {"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""}, } @@ -580,6 +596,7 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { "pyNb.py", "r1.ipynb", "rNb.r", + "scala1.ipynb", "scalaNb.scala", "sqlNb.sql", }, names) @@ -606,6 +623,7 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer { {"bar.py", "print('foo')"}, {"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, + {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, {"pretender", "not a notebook"}, {"dir/file.txt", "file content"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, @@ -633,6 +651,7 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) { filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')") filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python") filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "R") + filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala") filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content") filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')") filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook") @@ -682,6 +701,11 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) { require.NoError(t, err) filerTest{t, wf}.assertNotExists(ctx, "r1.ipynb") + // Delete scala jupyter notebook + err = wf.Delete(ctx, "scala1.ipynb") + require.NoError(t, err) + filerTest{t, wf}.assertNotExists(ctx, "scala1.ipynb") + // Delete non-existent file err = wf.Delete(ctx, "non-existent.py") assert.ErrorIs(t, err, fs.ErrNotExist) @@ -734,6 +758,12 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { assert.Equal(t, "r1.ipynb", info.Name()) assert.False(t, info.IsDir()) + // Stat on a Scala jupyter notebook + info, err = wf.Stat(ctx, "scala1.ipynb") + require.NoError(t, err) + assert.Equal(t, "scala1.ipynb", info.Name()) + assert.False(t, info.IsDir()) + // Stat on a directory info, err = wf.Stat(ctx, "dir") require.NoError(t, err) @@ -794,10 +824,14 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { sourceContent: "# Databricks notebook source\nprint('foo')", jupyterName: "foo.ipynb", }, + { + language: "scala", + sourceName: "foo.scala", + sourceContent: "// Databricks notebook source\nprintln('foo')", + jupyterName: "foo.ipynb", + }, } { - t.Run(tc.language, func(t *testing.T) { - t.Parallel() - + t.Run("source_"+tc.language, func(t *testing.T) { ctx := context.Background() wf, _ := setupWsfsExtensionsFiler(t) @@ -834,10 +868,14 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { jupyterName: "foo.ipynb", jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"), }, + { + language: "scala", + sourceName: "bar.scala", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/scala1.ipynb"), + }, } { - t.Run(tc.language, func(t *testing.T) { - t.Parallel() - + t.Run("jupyter_"+tc.language, func(t *testing.T) { ctx := context.Background() wf, _ := setupWsfsExtensionsFiler(t) diff --git a/internal/testdata/notebooks/r2.ipynb b/internal/testdata/notebooks/r2.ipynb index 6280426a37..f2ff413d25 100644 --- a/internal/testdata/notebooks/r2.ipynb +++ b/internal/testdata/notebooks/r2.ipynb @@ -3,10 +3,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "r" + } + }, "outputs": [], "source": [ - "print(1)" + "print(2)" ] } ], diff --git a/internal/testdata/notebooks/scala1.ipynb b/internal/testdata/notebooks/scala1.ipynb new file mode 100644 index 0000000000..25a5a187b0 --- /dev/null +++ b/internal/testdata/notebooks/scala1.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "println(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.13.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/scala2.ipynb b/internal/testdata/notebooks/scala2.ipynb new file mode 100644 index 0000000000..353fc29ffd --- /dev/null +++ b/internal/testdata/notebooks/scala2.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "println(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.13.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 20a30f7c6f5787f1b2703538db3d73a620213cab Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 21:40:43 +0200 Subject: [PATCH 04/16] add test for sql as well as enum for extensions --- cmd/bundle/generate/utils.go | 2 +- cmd/workspace/workspace/export_dir.go | 2 +- internal/filer_test.go | 136 +++++++++++------- internal/testdata/notebooks/sql1.ipynb | 20 +++ internal/testdata/notebooks/sql2.ipynb | 20 +++ .../workspace_files_extensions_client.go | 40 +++--- libs/notebook/ext.go | 35 ++++- 7 files changed, 172 insertions(+), 83 deletions(-) create mode 100644 internal/testdata/notebooks/sql1.ipynb create mode 100644 internal/testdata/notebooks/sql2.ipynb diff --git a/cmd/bundle/generate/utils.go b/cmd/bundle/generate/utils.go index 65f6924193..0a4c8c197a 100644 --- a/cmd/bundle/generate/utils.go +++ b/cmd/bundle/generate/utils.go @@ -71,7 +71,7 @@ func (n *downloader) markNotebookForDownload(ctx context.Context, notebookPath * ext := notebook.GetExtensionByLanguage(info) - filename := path.Base(*notebookPath) + ext + filename := path.Base(*notebookPath) + string(ext) targetPath := filepath.Join(n.sourceDir, filename) n.files[targetPath] = *notebookPath diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index 0046f46ef9..224264ab59 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -48,7 +48,7 @@ func (opts exportDirOptions) callback(ctx context.Context, workspaceFiler filer. return err } objectInfo := info.Sys().(workspace.ObjectInfo) - targetPath += notebook.GetExtensionByLanguage(&objectInfo) + targetPath += string(notebook.GetExtensionByLanguage(&objectInfo)) // Skip file if a file already exists in path. // os.Stat returns a fs.ErrNotExist if a file does not exist at path. diff --git a/internal/filer_test.go b/internal/filer_test.go index 7e0fb1ba60..04ab236425 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -427,8 +427,17 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { expected1: "// Databricks notebook source\nprintln(1)", content2: readFile(t, "testdata/notebooks/scala2.ipynb"), }, + { + name: "sqlJupyterNotebook.ipynb", + nameWithoutExt: "sqlJupyterNotebook", + content1: readFile(t, "testdata/notebooks/sql1.ipynb"), + expected1: "-- Databricks notebook source\nselect 1", + content2: readFile(t, "testdata/notebooks/sql2.ipynb"), + }, } { t.Run(tc.name, func(t *testing.T) { + t.Parallel() + // Upload the notebook err = f.Write(ctx, tc.name, strings.NewReader(tc.content1)) require.NoError(t, err) @@ -516,8 +525,18 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { content2: readFile(t, "testdata/notebooks/scala2.ipynb"), expected2: "// Databricks notebook source\nprintln(2)", }, + { + name: "sqlJupyterNotebook.ipynb", + nameWithoutExt: "sqlJupyterNotebook", + content1: readFile(t, "testdata/notebooks/sql1.ipynb"), + expected1: "-- Databricks notebook source\nselect 1", + content2: readFile(t, "testdata/notebooks/sql2.ipynb"), + expected2: "-- Databricks notebook source\nselect 2", + }, } { t.Run(tcases.name, func(t *testing.T) { + t.Parallel() + // Upload the notebook err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content1)) require.NoError(t, err) @@ -535,8 +554,6 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { } } -// TODO: Add a test that the exported file has the right extension / language_info set? -// Required for DABs in the workspace. func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { t.Parallel() @@ -557,6 +574,7 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, + {"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")}, {"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""}, } @@ -598,6 +616,7 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { "rNb.r", "scala1.ipynb", "scalaNb.scala", + "sql1.ipynb", "sqlNb.sql", }, names) @@ -624,6 +643,7 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer { {"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, + {"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")}, {"pretender", "not a notebook"}, {"dir/file.txt", "file content"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, @@ -649,13 +669,15 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) { // Read contents of test fixtures as a sanity check. filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))") filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')") - filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python") - filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "R") - filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala") filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content") filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')") filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook") + filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python") + filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "R") + filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala") + filerTest{t, wf}.assertContentsJupyter(ctx, "sql1.ipynb", "sql") + // Read non-existent file _, err := wf.Read(ctx, "non-existent.py") assert.ErrorIs(t, err, fs.ErrNotExist) @@ -706,6 +728,11 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) { require.NoError(t, err) filerTest{t, wf}.assertNotExists(ctx, "scala1.ipynb") + // Delete sql jupyter notebook + err = wf.Delete(ctx, "sql1.ipynb") + require.NoError(t, err) + filerTest{t, wf}.assertNotExists(ctx, "sql1.ipynb") + // Delete non-existent file err = wf.Delete(ctx, "non-existent.py") assert.ErrorIs(t, err, fs.ErrNotExist) @@ -734,56 +761,45 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { ctx := context.Background() wf := setupFilerWithExtensionsTest(t) - // Stat on a notebook - info, err := wf.Stat(ctx, "foo.py") - require.NoError(t, err) - assert.Equal(t, "foo.py", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on a file - info, err = wf.Stat(ctx, "bar.py") - require.NoError(t, err) - assert.Equal(t, "bar.py", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on a python jupyter notebook - info, err = wf.Stat(ctx, "p1.ipynb") - require.NoError(t, err) - assert.Equal(t, "p1.ipynb", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on an R jupyter notebook - info, err = wf.Stat(ctx, "r1.ipynb") - require.NoError(t, err) - assert.Equal(t, "r1.ipynb", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on a Scala jupyter notebook - info, err = wf.Stat(ctx, "scala1.ipynb") - require.NoError(t, err) - assert.Equal(t, "scala1.ipynb", info.Name()) - assert.False(t, info.IsDir()) + for _, fileName := range []string{ + // notebook + "foo.py", + // file + "bar.py", + // python jupyter notebook + "p1.ipynb", + // R jupyter notebook + "r1.ipynb", + // Scala jupyter notebook + "scala1.ipynb", + // SQL jupyter notebook + "sql1.ipynb", + } { + info, err := wf.Stat(ctx, fileName) + require.NoError(t, err) + assert.Equal(t, fileName, info.Name()) + assert.False(t, info.IsDir()) + } // Stat on a directory - info, err = wf.Stat(ctx, "dir") + info, err := wf.Stat(ctx, "dir") require.NoError(t, err) assert.Equal(t, "dir", info.Name()) assert.True(t, info.IsDir()) - // Stat on a non-existent file - _, err = wf.Stat(ctx, "non-existent.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not stat a file as a notebook - _, err = wf.Stat(ctx, "pretender.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not stat a Scala notebook as a Python notebook - _, err = wf.Stat(ctx, "scala-notebook.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - _, err = wf.Stat(ctx, "pretender.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) + for _, fileName := range []string{ + // non-existent file + "non-existent.py", + // do not stat a file as a notebook + "pretender.py", + // do not stat a Scala notebook as a Python notebook + "scala-notebook.py", + // do not read a regular file as a Jupyter notebook + "pretender.ipynb", + } { + _, err := wf.Stat(ctx, fileName) + assert.ErrorIs(t, err, fs.ErrNotExist) + } } func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { @@ -830,8 +846,16 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { sourceContent: "// Databricks notebook source\nprintln('foo')", jupyterName: "foo.ipynb", }, + { + language: "sql", + sourceName: "foo.sql", + sourceContent: "-- Databricks notebook source\nselect 'foo'", + jupyterName: "foo.ipynb", + }, } { t.Run("source_"+tc.language, func(t *testing.T) { + t.Parallel() + ctx := context.Background() wf, _ := setupWsfsExtensionsFiler(t) @@ -858,24 +882,32 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { }{ { language: "python", - sourceName: "bar.py", + sourceName: "foo.py", jupyterName: "foo.ipynb", jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"), }, { language: "R", - sourceName: "bar.r", + sourceName: "foo.r", jupyterName: "foo.ipynb", jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"), }, { language: "scala", - sourceName: "bar.scala", + sourceName: "foo.scala", jupyterName: "foo.ipynb", jupyterContent: readFile(t, "testdata/notebooks/scala1.ipynb"), }, + { + language: "sql", + sourceName: "foo.sql", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/sql1.ipynb"), + }, } { t.Run("jupyter_"+tc.language, func(t *testing.T) { + t.Parallel() + ctx := context.Background() wf, _ := setupWsfsExtensionsFiler(t) diff --git a/internal/testdata/notebooks/sql1.ipynb b/internal/testdata/notebooks/sql1.ipynb new file mode 100644 index 0000000000..7a3562a166 --- /dev/null +++ b/internal/testdata/notebooks/sql1.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "select 1" + ] + } + ], + "metadata": { + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/sql2.ipynb b/internal/testdata/notebooks/sql2.ipynb new file mode 100644 index 0000000000..7780e1daf7 --- /dev/null +++ b/internal/testdata/notebooks/sql2.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "select 2" + ] + } + ], + "metadata": { + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 509fe89310..0d49b07f59 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -7,6 +7,7 @@ import ( "io" "io/fs" "path" + "slices" "strings" "github.com/databricks/cli/libs/log" @@ -23,17 +24,6 @@ type workspaceFilesExtensionsClient struct { readonly bool } -var extensionsToLanguages = map[string]workspace.Language{ - ".py": workspace.LanguagePython, - ".r": workspace.LanguageR, - ".scala": workspace.LanguageScala, - ".sql": workspace.LanguageSql, - - // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. - // Thus, we do not need to check the language for .ipynb files. - ".ipynb": workspace.LanguagePython, -} - type workspaceFileStatus struct { wsfsFileInfo @@ -57,11 +47,12 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { - ext := path.Ext(name) - nameWithoutExt := strings.TrimSuffix(name, ext) + // TODO: What happens when this type casting is not possible? + ext := notebook.Extension(path.Ext(name)) + nameWithoutExt := strings.TrimSuffix(name, string(ext)) // File name does not have an extension associated with Databricks notebooks, return early. - if _, ok := extensionsToLanguages[ext]; !ok { + if _, ok := notebook.ExtensionToLanguage[ext]; !ok { return nil, nil } @@ -84,28 +75,33 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex // Not the correct language. Return early. Note: All languages are supported // for Jupyter notebooks. - if ext != ".ipynb" && stat.Language != extensionsToLanguages[ext] { - log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language) + if ext != notebook.ExtensionJupyter && stat.Language != notebook.ExtensionToLanguage[ext] { + log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), notebook.ExtensionToLanguage[ext], stat.Language) return nil, nil } - // When the extension is .py we expect the export format to be source. + // When the extension is one of .py, .r, .scala or .sql we expect the export format to be source. // If it's not, return early. - if ext == ".py" && stat.ReposExportFormat != workspace.ExportFormatSource { + if slices.Contains([]notebook.Extension{ + notebook.ExtensionPython, + notebook.ExtensionR, + notebook.ExtensionScala, + notebook.ExtensionSql}, ext) && + stat.ReposExportFormat != workspace.ExportFormatSource { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) return nil, nil } // When the extension is .ipynb we expect the export format to be Jupyter. // If it's not, return early. - if ext == ".ipynb" && stat.ReposExportFormat != workspace.ExportFormatJupyter { + if ext == notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatJupyter { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) return nil, nil } // Modify the stat object path to include the extension. This stat object will be used // to return the fs.FileInfo object in the stat method. - stat.Path = stat.Path + ext + stat.Path = stat.Path + string(ext) return &workspaceFileStatus{ wsfsFileInfo: stat, nameForWorkspaceAPI: nameWithoutExt, @@ -130,12 +126,12 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con // If the notebook was exported as a Jupyter notebook, the extension should be .ipynb. // TODO: Test this. if stat.ReposExportFormat == workspace.ExportFormatJupyter { - ext = ".ipynb" + ext = notebook.ExtensionJupyter } // Modify the stat object path to include the extension. This stat object will be used // to return the fs.DirEntry object in the ReadDir method. - stat.Path = stat.Path + ext + stat.Path = stat.Path + string(ext) return &workspaceFileStatus{ wsfsFileInfo: stat, nameForWorkspaceAPI: name, diff --git a/libs/notebook/ext.go b/libs/notebook/ext.go index 28d08c11ac..f8d5a85030 100644 --- a/libs/notebook/ext.go +++ b/libs/notebook/ext.go @@ -2,22 +2,43 @@ package notebook import "github.com/databricks/databricks-sdk-go/service/workspace" -func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string { +type Extension string + +const ( + ExtensionNone Extension = "" + ExtensionPython Extension = ".py" + ExtensionR Extension = ".r" + ExtensionScala Extension = ".scala" + ExtensionSql Extension = ".sql" + ExtensionJupyter Extension = ".ipynb" +) + +var ExtensionToLanguage = map[Extension]workspace.Language{ + ExtensionPython: workspace.LanguagePython, + ExtensionR: workspace.LanguageR, + ExtensionScala: workspace.LanguageScala, + ExtensionSql: workspace.LanguageSql, + + // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. + ExtensionJupyter: workspace.LanguageUnknown, +} + +func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) Extension { if objectInfo.ObjectType != workspace.ObjectTypeNotebook { - return "" + return ExtensionNone } switch objectInfo.Language { case workspace.LanguagePython: - return ".py" + return ExtensionPython case workspace.LanguageR: - return ".r" + return ExtensionR case workspace.LanguageScala: - return ".scala" + return ExtensionScala case workspace.LanguageSql: - return ".sql" + return ExtensionSql default: // Do not add any extension to the file name - return "" + return ExtensionNone } } From 2c7f41a5aae04af7327d6de76822369adfe60a17 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 21:41:35 +0200 Subject: [PATCH 05/16] cleanup todos --- libs/filer/workspace_files_extensions_client.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 0d49b07f59..492280a35f 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -40,10 +40,6 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) return info.(wsfsFileInfo), err } -// TODO: Add end to end tests that the filer works for all .ipynb cases. -// TODO: Also fix the sync issues. OR add tests that sync works fine with non -// python notebooks. Is this needed in the first place? - // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { From 4bce4f1d05b3af07355f31cb19674fe89cb43016 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 21:44:15 +0200 Subject: [PATCH 06/16] cleanup todos --- libs/filer/workspace_files_extensions_client.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 492280a35f..4b10bf7da5 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -43,7 +43,6 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { - // TODO: What happens when this type casting is not possible? ext := notebook.Extension(path.Ext(name)) nameWithoutExt := strings.TrimSuffix(name, string(ext)) @@ -120,7 +119,6 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo) // If the notebook was exported as a Jupyter notebook, the extension should be .ipynb. - // TODO: Test this. if stat.ReposExportFormat == workspace.ExportFormatJupyter { ext = notebook.ExtensionJupyter } From 6ad24b54597acb61e298b4194f648465c944466b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 21:53:41 +0200 Subject: [PATCH 07/16] better name for test case --- .../workspace_files_extensions_client_test.go | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 1dd8a242db..974a6a37b5 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -37,7 +37,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError string }{ { - name: "python source notebook and file", + name: "python source notebook and file with source extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatSource, notebookPath: "/dir/foo", @@ -45,7 +45,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", }, { - name: "scala source notebook and file", + name: "scala source notebook and file with source extension", language: workspace.LanguageScala, notebookExportFormat: workspace.ExportFormatSource, notebookPath: "/dir/foo", @@ -53,7 +53,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", }, { - name: "r source notebook and file", + name: "r source notebook and file with source extension", language: workspace.LanguageR, notebookExportFormat: workspace.ExportFormatSource, notebookPath: "/dir/foo", @@ -61,7 +61,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue", }, { - name: "sql source notebook and file", + name: "sql source notebook and file with source extension", language: workspace.LanguageSql, notebookExportFormat: workspace.ExportFormatSource, notebookPath: "/dir/foo", @@ -69,7 +69,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", }, { - name: "python jupyter notebook and file", + name: "python jupyter notebook and file with source extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -78,7 +78,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "", }, { - name: "scala jupyter notebook and file", + name: "scala jupyter notebook and file with source extension", language: workspace.LanguageScala, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -87,7 +87,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "", }, { - name: "sql jupyter notebook and file", + name: "sql jupyter notebook and file with source extension", language: workspace.LanguageSql, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -96,7 +96,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "", }, { - name: "r jupyter notebook and file", + name: "r jupyter notebook and file with source extension", language: workspace.LanguageR, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -105,7 +105,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "", }, { - name: "python jupyter notebook and file", + name: "python jupyter notebook and file with .ipynb extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -113,7 +113,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", }, { - name: "scala jupyter notebook and file", + name: "scala jupyter notebook and file with .ipynb extension", language: workspace.LanguageScala, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -121,7 +121,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", }, { - name: "r jupyter notebook and file", + name: "r jupyter notebook and file with .ipynb extension", language: workspace.LanguageR, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -129,7 +129,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", }, { - name: "sql jupyter notebook and file", + name: "sql jupyter notebook and file with .ipynb extension", language: workspace.LanguageSql, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", From 86831228cfdcff25d854bfbd17fbb6fe9202a3c7 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 22:04:58 +0200 Subject: [PATCH 08/16] improve tests --- internal/filer_test.go | 91 ++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 48 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index 04ab236425..a2387e4872 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -361,9 +361,6 @@ func TestAccFilerReadDir(t *testing.T) { } } -// TODO: Continue adding tests for other types of notebooks than python here. -// Exporting from a workspace makes this work easier. - func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { t.Parallel() @@ -703,50 +700,41 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) { ctx := context.Background() wf := setupFilerWithExtensionsTest(t) - // Delete notebook - err := wf.Delete(ctx, "foo.py") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "foo.py") - - // Delete file - err = wf.Delete(ctx, "bar.py") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "bar.py") - - // Delete python jupyter notebook - err = wf.Delete(ctx, "p1.ipynb") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "p1.ipynb") - - // Delete r jupyter notebook - err = wf.Delete(ctx, "r1.ipynb") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "r1.ipynb") - - // Delete scala jupyter notebook - err = wf.Delete(ctx, "scala1.ipynb") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "scala1.ipynb") - - // Delete sql jupyter notebook - err = wf.Delete(ctx, "sql1.ipynb") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "sql1.ipynb") - - // Delete non-existent file - err = wf.Delete(ctx, "non-existent.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not delete a file as a notebook - err = wf.Delete(ctx, "pretender.py") - assert.ErrorIs(t, err, fs.ErrNotExist) + for _, fileName := range []string{ + // notebook + "foo.py", + // file + "bar.py", + // python jupyter notebook + "p1.ipynb", + // R jupyter notebook + "r1.ipynb", + // Scala jupyter notebook + "scala1.ipynb", + // SQL jupyter notebook + "sql1.ipynb", + } { + err := wf.Delete(ctx, fileName) + require.NoError(t, err) + filerTest{t, wf}.assertNotExists(ctx, fileName) + } - // Ensure we do not delete a Scala notebook as a Python notebook - _, err = wf.Read(ctx, "scala-notebook.py") - assert.ErrorIs(t, err, fs.ErrNotExist) + for _, fileName := range []string{ + // do not delete non-existent file + "non-existent.py", + // do not delete a file assuming it is a notebook and stripping the extension + "pretender.py", + // do not delete a Scala notebook as a Python notebook + "scala-notebook.py", + // do not delete a file assuming it is a Jupyter notebook and stripping the extension + "pretender.ipynb", + } { + err := wf.Delete(ctx, fileName) + assert.ErrorIs(t, err, fs.ErrNotExist) + } // Delete directory - err = wf.Delete(ctx, "dir") + err := wf.Delete(ctx, "dir") assert.ErrorIs(t, err, fs.ErrInvalid) // Delete directory recursively @@ -790,11 +778,11 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { for _, fileName := range []string{ // non-existent file "non-existent.py", - // do not stat a file as a notebook + // do not stat a file assuming it is a notebook and stripping the extension "pretender.py", // do not stat a Scala notebook as a Python notebook "scala-notebook.py", - // do not read a regular file as a Jupyter notebook + // do not read a regular file assuming it is a Jupyter notebook and stripping the extension "pretender.ipynb", } { _, err := wf.Stat(ctx, fileName) @@ -862,8 +850,11 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { err := wf.Write(ctx, tc.sourceName, strings.NewReader(tc.sourceContent)) require.NoError(t, err) - // The source notebook should exist but not the Jupyter notebook + // Assert on the content of the source notebook that's been written. filerTest{t, wf}.assertContents(ctx, tc.sourceName, tc.sourceContent) + + // Ensure that the source notebook is not read when the name contains + // the .ipynb extension. _, err = wf.Stat(ctx, tc.jupyterName) assert.ErrorIs(t, err, fs.ErrNotExist) _, err = wf.Read(ctx, tc.jupyterName) @@ -914,8 +905,12 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { err := wf.Write(ctx, tc.jupyterName, strings.NewReader(tc.jupyterContent)) require.NoError(t, err) - // The Jupyter notebook should exist but not the source notebook + // Assert that the written notebook is jupyter and has the correct + // language_info metadata set. filerTest{t, wf}.assertContentsJupyter(ctx, tc.jupyterName, tc.language) + + // Ensure that the Jupyter notebook is not read when the name does not + // contain the .ipynb extension. _, err = wf.Stat(ctx, tc.sourceName) assert.ErrorIs(t, err, fs.ErrNotExist) _, err = wf.Read(ctx, tc.sourceName) From dda138fa63163d87ff78d6e757c8bf3fdf2937b9 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 21 Oct 2024 22:17:19 +0200 Subject: [PATCH 09/16] - --- libs/filer/workspace_files_extensions_client.go | 7 ++++++- libs/notebook/ext.go | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 4b10bf7da5..4b6c8679f6 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -47,7 +47,12 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex nameWithoutExt := strings.TrimSuffix(name, string(ext)) // File name does not have an extension associated with Databricks notebooks, return early. - if _, ok := notebook.ExtensionToLanguage[ext]; !ok { + if !slices.Contains([]notebook.Extension{ + notebook.ExtensionPython, + notebook.ExtensionR, + notebook.ExtensionScala, + notebook.ExtensionSql, + notebook.ExtensionJupyter}, ext) { return nil, nil } diff --git a/libs/notebook/ext.go b/libs/notebook/ext.go index f8d5a85030..3c8274a50e 100644 --- a/libs/notebook/ext.go +++ b/libs/notebook/ext.go @@ -20,7 +20,6 @@ var ExtensionToLanguage = map[Extension]workspace.Language{ ExtensionSql: workspace.LanguageSql, // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. - ExtensionJupyter: workspace.LanguageUnknown, } func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) Extension { From 7d5eca562f449ae3aece1f8725334bbebdad5c83 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 31 Oct 2024 11:25:37 +0100 Subject: [PATCH 10/16] remove extension type --- cmd/bundle/generate/utils.go | 2 +- cmd/workspace/workspace/export_dir.go | 2 +- libs/filer/workspace_files_extensions_client.go | 12 ++++++------ libs/notebook/ext.go | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cmd/bundle/generate/utils.go b/cmd/bundle/generate/utils.go index 0a4c8c197a..65f6924193 100644 --- a/cmd/bundle/generate/utils.go +++ b/cmd/bundle/generate/utils.go @@ -71,7 +71,7 @@ func (n *downloader) markNotebookForDownload(ctx context.Context, notebookPath * ext := notebook.GetExtensionByLanguage(info) - filename := path.Base(*notebookPath) + string(ext) + filename := path.Base(*notebookPath) + ext targetPath := filepath.Join(n.sourceDir, filename) n.files[targetPath] = *notebookPath diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index 224264ab59..0046f46ef9 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -48,7 +48,7 @@ func (opts exportDirOptions) callback(ctx context.Context, workspaceFiler filer. return err } objectInfo := info.Sys().(workspace.ObjectInfo) - targetPath += string(notebook.GetExtensionByLanguage(&objectInfo)) + targetPath += notebook.GetExtensionByLanguage(&objectInfo) // Skip file if a file already exists in path. // os.Stat returns a fs.ErrNotExist if a file does not exist at path. diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 4b6c8679f6..3d41701c50 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -43,11 +43,11 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { - ext := notebook.Extension(path.Ext(name)) - nameWithoutExt := strings.TrimSuffix(name, string(ext)) + ext := path.Ext(name) + nameWithoutExt := strings.TrimSuffix(name, ext) // File name does not have an extension associated with Databricks notebooks, return early. - if !slices.Contains([]notebook.Extension{ + if !slices.Contains([]string{ notebook.ExtensionPython, notebook.ExtensionR, notebook.ExtensionScala, @@ -82,7 +82,7 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex // When the extension is one of .py, .r, .scala or .sql we expect the export format to be source. // If it's not, return early. - if slices.Contains([]notebook.Extension{ + if slices.Contains([]string{ notebook.ExtensionPython, notebook.ExtensionR, notebook.ExtensionScala, @@ -101,7 +101,7 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex // Modify the stat object path to include the extension. This stat object will be used // to return the fs.FileInfo object in the stat method. - stat.Path = stat.Path + string(ext) + stat.Path = stat.Path + ext return &workspaceFileStatus{ wsfsFileInfo: stat, nameForWorkspaceAPI: nameWithoutExt, @@ -130,7 +130,7 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con // Modify the stat object path to include the extension. This stat object will be used // to return the fs.DirEntry object in the ReadDir method. - stat.Path = stat.Path + string(ext) + stat.Path = stat.Path + ext return &workspaceFileStatus{ wsfsFileInfo: stat, nameForWorkspaceAPI: name, diff --git a/libs/notebook/ext.go b/libs/notebook/ext.go index 3c8274a50e..618c36c8e6 100644 --- a/libs/notebook/ext.go +++ b/libs/notebook/ext.go @@ -5,15 +5,15 @@ import "github.com/databricks/databricks-sdk-go/service/workspace" type Extension string const ( - ExtensionNone Extension = "" - ExtensionPython Extension = ".py" - ExtensionR Extension = ".r" - ExtensionScala Extension = ".scala" - ExtensionSql Extension = ".sql" - ExtensionJupyter Extension = ".ipynb" + ExtensionNone string = "" + ExtensionPython string = ".py" + ExtensionR string = ".r" + ExtensionScala string = ".scala" + ExtensionSql string = ".sql" + ExtensionJupyter string = ".ipynb" ) -var ExtensionToLanguage = map[Extension]workspace.Language{ +var ExtensionToLanguage = map[string]workspace.Language{ ExtensionPython: workspace.LanguagePython, ExtensionR: workspace.LanguageR, ExtensionScala: workspace.LanguageScala, @@ -22,7 +22,7 @@ var ExtensionToLanguage = map[Extension]workspace.Language{ // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. } -func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) Extension { +func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string { if objectInfo.ObjectType != workspace.ObjectTypeNotebook { return ExtensionNone } From 96027853475f70810ba0f6c2692617f9bb0e6a88 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 31 Oct 2024 11:46:13 +0100 Subject: [PATCH 11/16] - --- internal/filer_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index a2387e4872..27974b3652 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -439,7 +439,9 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { err = f.Write(ctx, tc.name, strings.NewReader(tc.content1)) require.NoError(t, err) - // Assert contents after initial upload + // Assert contents after initial upload. Note that we expect the content + // for jupyter notebooks to be of type source because the workspace files + // client always uses the source format to read notebooks from the workspace. filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected1) // Assert uploading a second time fails due to overwrite mode missing From 606c0c52a1fecc63f34f7007e0f8fa73cbaf1193 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 31 Oct 2024 13:56:41 +0100 Subject: [PATCH 12/16] combine tests with and wo override flag --- internal/filer_test.go | 120 +++++++---------------------------------- 1 file changed, 19 insertions(+), 101 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index 27974b3652..b9f0a28754 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -361,19 +361,19 @@ func TestAccFilerReadDir(t *testing.T) { } } -func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { +func TestAccFilerWorkspaceNotebook(t *testing.T) { t.Parallel() - f, _ := setupWsfsFiler(t) ctx := context.Background() var err error - for _, tc := range []struct { + tcases := []struct { name string nameWithoutExt string content1 string expected1 string content2 string + expected2 string }{ { name: "pyNb.py", @@ -381,6 +381,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: "# Databricks notebook source\nprint('first upload')", expected1: "# Databricks notebook source\nprint('first upload')", content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", }, { name: "rNb.r", @@ -388,6 +389,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: "# Databricks notebook source\nprint('first upload')", expected1: "# Databricks notebook source\nprint('first upload')", content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", }, { name: "sqlNb.sql", @@ -395,6 +397,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: "-- Databricks notebook source\n SELECT \"first upload\"", expected1: "-- Databricks notebook source\n SELECT \"first upload\"", content2: "-- Databricks notebook source\n SELECT \"second upload\"", + expected2: "-- Databricks notebook source\n SELECT \"second upload\"", }, { name: "scalaNb.scala", @@ -402,6 +405,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: "// Databricks notebook source\n println(\"first upload\")", expected1: "// Databricks notebook source\n println(\"first upload\")", content2: "// Databricks notebook source\n println(\"second upload\")", + expected2: "// Databricks notebook source\n println(\"second upload\")", }, { name: "pythonJupyterNb.ipynb", @@ -409,6 +413,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: readFile(t, "testdata/notebooks/py1.ipynb"), expected1: "# Databricks notebook source\nprint(1)", content2: readFile(t, "testdata/notebooks/py2.ipynb"), + expected2: "# Databricks notebook source\nprint(2)", }, { name: "rJupyterNb.ipynb", @@ -416,6 +421,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: readFile(t, "testdata/notebooks/r1.ipynb"), expected1: "# Databricks notebook source\nprint(1)", content2: readFile(t, "testdata/notebooks/r2.ipynb"), + expected2: "# Databricks notebook source\nprint(2)", }, { name: "scalaJupyterNb.ipynb", @@ -423,6 +429,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: readFile(t, "testdata/notebooks/scala1.ipynb"), expected1: "// Databricks notebook source\nprintln(1)", content2: readFile(t, "testdata/notebooks/scala2.ipynb"), + expected2: "// Databricks notebook source\nprintln(2)", }, { name: "sqlJupyterNotebook.ipynb", @@ -430,8 +437,12 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { content1: readFile(t, "testdata/notebooks/sql1.ipynb"), expected1: "-- Databricks notebook source\nselect 1", content2: readFile(t, "testdata/notebooks/sql2.ipynb"), + expected2: "-- Databricks notebook source\nselect 2", }, - } { + } + + for _, tc := range tcases { + f, _ := setupWsfsFiler(t) t.Run(tc.name, func(t *testing.T) { t.Parallel() @@ -449,108 +460,15 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { assert.ErrorIs(t, err, fs.ErrExist) assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error()) - }) - } -} - -func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { - t.Parallel() - - f, _ := setupWsfsFiler(t) - ctx := context.Background() - var err error - - for _, tcases := range []struct { - name string - nameWithoutExt string - content1 string - expected1 string - content2 string - expected2 string - }{ - { - name: "pyNb.py", - nameWithoutExt: "pyNb", - content1: "# Databricks notebook source\nprint('first upload')", - expected1: "# Databricks notebook source\nprint('first upload')", - content2: "# Databricks notebook source\nprint('second upload')", - expected2: "# Databricks notebook source\nprint('second upload')", - }, - { - name: "rNb.r", - nameWithoutExt: "rNb", - content1: "# Databricks notebook source\nprint('first upload')", - expected1: "# Databricks notebook source\nprint('first upload')", - content2: "# Databricks notebook source\nprint('second upload')", - expected2: "# Databricks notebook source\nprint('second upload')", - }, - { - name: "sqlNb.sql", - nameWithoutExt: "sqlNb", - content1: "-- Databricks notebook source\n SELECT \"first upload\"", - expected1: "-- Databricks notebook source\n SELECT \"first upload\"", - content2: "-- Databricks notebook source\n SELECT \"second upload\"", - expected2: "-- Databricks notebook source\n SELECT \"second upload\"", - }, - { - name: "scalaNb.scala", - nameWithoutExt: "scalaNb", - content1: "// Databricks notebook source\n println(\"first upload\")", - expected1: "// Databricks notebook source\n println(\"first upload\")", - content2: "// Databricks notebook source\n println(\"second upload\")", - expected2: "// Databricks notebook source\n println(\"second upload\")", - }, - { - name: "pythonJupyterNb.ipynb", - nameWithoutExt: "pythonJupyterNb", - content1: readFile(t, "testdata/notebooks/py1.ipynb"), - expected1: "# Databricks notebook source\nprint(1)", - content2: readFile(t, "testdata/notebooks/py2.ipynb"), - expected2: "# Databricks notebook source\nprint(2)", - }, - { - name: "rJupyterNb.ipynb", - nameWithoutExt: "rJupyterNb", - content1: readFile(t, "testdata/notebooks/r1.ipynb"), - expected1: "# Databricks notebook source\nprint(1)", - content2: readFile(t, "testdata/notebooks/r2.ipynb"), - expected2: "# Databricks notebook source\nprint(2)", - }, - { - name: "scalaJupyterNb.ipynb", - nameWithoutExt: "scalaJupyterNb", - content1: readFile(t, "testdata/notebooks/scala1.ipynb"), - expected1: "// Databricks notebook source\nprintln(1)", - content2: readFile(t, "testdata/notebooks/scala2.ipynb"), - expected2: "// Databricks notebook source\nprintln(2)", - }, - { - name: "sqlJupyterNotebook.ipynb", - nameWithoutExt: "sqlJupyterNotebook", - content1: readFile(t, "testdata/notebooks/sql1.ipynb"), - expected1: "-- Databricks notebook source\nselect 1", - content2: readFile(t, "testdata/notebooks/sql2.ipynb"), - expected2: "-- Databricks notebook source\nselect 2", - }, - } { - t.Run(tcases.name, func(t *testing.T) { - t.Parallel() - - // Upload the notebook - err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content1)) - require.NoError(t, err) - - // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, tcases.nameWithoutExt, tcases.expected1) - - // Upload the notebook a second time with overwrite flag - err = f.Write(ctx, tcases.name, strings.NewReader(tcases.content2), filer.OverwriteIfExists) + // Try uploading the notebook again with overwrite flag. This time it should succeed. + err = f.Write(ctx, tc.name, strings.NewReader(tc.content2), filer.OverwriteIfExists) require.NoError(t, err) // Assert contents after second upload - filerTest{t, f}.assertContents(ctx, tcases.nameWithoutExt, tcases.expected2) + filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected2) }) } + } func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { From faca8ccd7a9fd18405ab5903a9569cdbb6719c53 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 31 Oct 2024 14:00:38 +0100 Subject: [PATCH 13/16] remove second py notebook --- internal/filer_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index b9f0a28754..adb19e9a15 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -485,7 +485,6 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"foo.scala", "println('foo')"}, {"foo.sql", "SELECT 'foo'"}, {"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, - {"py2.ipynb", readFile(t, "testdata/notebooks/py2.ipynb")}, {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, @@ -527,7 +526,6 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { "foo.scala", "foo.sql", "py1.ipynb", - "py2.ipynb", "pyNb.py", "r1.ipynb", "rNb.r", From a7e5210355ca76815a2d4200611de60221236164 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 1 Nov 2024 12:16:18 +0100 Subject: [PATCH 14/16] - --- libs/notebook/ext.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/libs/notebook/ext.go b/libs/notebook/ext.go index 618c36c8e6..c34ad2cc9e 100644 --- a/libs/notebook/ext.go +++ b/libs/notebook/ext.go @@ -2,8 +2,6 @@ package notebook import "github.com/databricks/databricks-sdk-go/service/workspace" -type Extension string - const ( ExtensionNone string = "" ExtensionPython string = ".py" From 00eec5a2308f34b6ac5dc9b7072032fd46aa561d Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 12 Nov 2024 18:32:38 +0100 Subject: [PATCH 15/16] Address comments --- libs/filer/workspace_files_extensions_client.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 3d41701c50..53b77dd5b8 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -80,14 +80,9 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex return nil, nil } - // When the extension is one of .py, .r, .scala or .sql we expect the export format to be source. + // For non-jupyter notebooks the export format should be source. // If it's not, return early. - if slices.Contains([]string{ - notebook.ExtensionPython, - notebook.ExtensionR, - notebook.ExtensionScala, - notebook.ExtensionSql}, ext) && - stat.ReposExportFormat != workspace.ExportFormatSource { + if ext != notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatSource { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) return nil, nil } From be53fa5ff48adf94996bc5333ca2605e9d45760c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 13 Nov 2024 15:49:19 +0100 Subject: [PATCH 16/16] address comments and fix integration tests --- internal/filer_test.go | 6 +++--- libs/notebook/detect.go | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index adb19e9a15..20207d343a 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -485,8 +485,8 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"foo.scala", "println('foo')"}, {"foo.sql", "SELECT 'foo'"}, {"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, - {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, + {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, @@ -589,7 +589,7 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) { filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook") filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python") - filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "R") + filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "r") filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala") filerTest{t, wf}.assertContentsJupyter(ctx, "sql1.ipynb", "sql") @@ -796,7 +796,7 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"), }, { - language: "R", + language: "r", sourceName: "foo.r", jupyterName: "foo.ipynb", jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"), diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 582a88479f..cd8680bfa4 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -107,19 +107,19 @@ func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.La // Determine which header to expect based on filename extension. ext := strings.ToLower(filepath.Ext(name)) switch ext { - case ".py": + case ExtensionPython: header = `# Databricks notebook source` language = workspace.LanguagePython - case ".r": + case ExtensionR: header = `# Databricks notebook source` language = workspace.LanguageR - case ".scala": + case ExtensionScala: header = "// Databricks notebook source" language = workspace.LanguageScala - case ".sql": + case ExtensionSql: header = "-- Databricks notebook source" language = workspace.LanguageSql - case ".ipynb": + case ExtensionJupyter: return DetectJupyterWithFS(fsys, name) default: return false, "", nil