From e0f6d68eea6ff2463274940cb0f5f83054e7dc7a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 7 Jun 2023 19:06:36 +0200 Subject: [PATCH 01/12] Add workspace export-dir command --- cmd/workspace/workspace/events.go | 42 +++++++++ cmd/workspace/workspace/export_dir.go | 117 ++++++++++++++++++++++++++ internal/filer_test.go | 6 ++ internal/workspace_test.go | 111 ++++++++++++++++++++++++ libs/filer/dbfs_client.go | 26 +++--- libs/filer/filer.go | 12 +++ libs/filer/workspace_files_client.go | 44 +++++----- 7 files changed, 322 insertions(+), 36 deletions(-) create mode 100644 cmd/workspace/workspace/events.go create mode 100644 cmd/workspace/workspace/export_dir.go diff --git a/cmd/workspace/workspace/events.go b/cmd/workspace/workspace/events.go new file mode 100644 index 0000000000..9479786938 --- /dev/null +++ b/cmd/workspace/workspace/events.go @@ -0,0 +1,42 @@ +package workspace + +type fileIOEvent struct { + SourcePath string `json:"source_path,omitempty"` + TargetPath string `json:"target_path,omitempty"` + Type string `json:"type"` +} + +const EventTypeDownloadComplete = "DOWNLOAD_COMPLETE" +const EventTypeExportStarted = "EXPORT_STARTED" +const EventTypeExportCompleted = "EXPORT_COMPLETED" +const EventTypeFileSkipped = "FILE_SKIPPED" + +func newDownloadCompleteEvent(sourcePath, targetPath string) fileIOEvent { + return fileIOEvent{ + SourcePath: sourcePath, + TargetPath: targetPath, + Type: EventTypeDownloadComplete, + } +} + +func newExportCompletedEvent(targetPath string) fileIOEvent { + return fileIOEvent{ + TargetPath: targetPath, + Type: EventTypeExportCompleted, + } +} + +func newFileSkippedEvent(sourcePath, targetPath string) fileIOEvent { + return fileIOEvent{ + SourcePath: sourcePath, + TargetPath: targetPath, + Type: EventTypeFileSkipped, + } +} + +func newExportStartedEvent(sourcePath string) fileIOEvent { + return fileIOEvent{ + SourcePath: sourcePath, + Type: EventTypeExportStarted, + } +} diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go new file mode 100644 index 0000000000..2f11d9c4f1 --- /dev/null +++ b/cmd/workspace/workspace/export_dir.go @@ -0,0 +1,117 @@ +package workspace + +import ( + "io" + "io/fs" + "os" + "path" + "path/filepath" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/spf13/cobra" +) + +var exportDirCommand = &cobra.Command{ + Use: "export-dir SOURCE_PATH TARGET_PATH", + Short: `Export a directory from a Databricks workspace to the local file system.`, + Long: ` +Export a directory recursively from a Databricks workspace to the local file system. +Notebooks will have one of the following extensions added .scala, .py, .sql, or .r +based on the language type. +`, + PreRunE: root.MustWorkspaceClient, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + sourceDir := args[0] + targetDir := args[1] + + // Initialize a filer and a file system on the source directory + workspaceFiler, err := filer.NewWorkspaceFilesClient(w, sourceDir) + if err != nil { + return err + } + workspaceFS := filer.NewFS(ctx, workspaceFiler) + + // TODO: print progress events on stderr instead: https://github.com/databricks/cli/issues/448 + err = cmdio.RenderWithTemplate(ctx, newExportStartedEvent(sourceDir), "Export started. Download files from {{.SourcePath}}\n") + if err != nil { + return err + } + + err = fs.WalkDir(workspaceFS, ".", func(relPath string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + sourcePath := path.Join(sourceDir, relPath) + targetPath := filepath.Join(targetDir, relPath) + + // create directory and return early + if d.IsDir() { + return os.MkdirAll(targetPath, 0755) + } + + // Add extension to local file path if the file is a notebook + stat, err := workspaceFiler.Stat(ctx, relPath) + if err != nil { + return err + } + info := stat.Sys().(workspace.ObjectInfo) + if info.ObjectType == workspace.ObjectTypeNotebook { + switch info.Language { + case workspace.LanguagePython: + targetPath += ".py" + case workspace.LanguageR: + targetPath += ".r" + case workspace.LanguageScala: + targetPath += ".scala" + case workspace.LanguageSql: + targetPath += ".sql" + default: + // Do not add any extension to the file name + } + } + + // TODO: test this behaviour + // Skip file if a file already exists in path + if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { + // Log event that this file/directory has been skipped + return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(sourcePath, targetPath), "Skipping {{.SourcePath}} because {{.TargetPath}} already exists\n") + } + + // create the file + f, err := os.Create(targetPath) + if err != nil { + return err + } + defer f.Close() + + // stream write content to the local file + r, err := workspaceFiler.Read(ctx, relPath) + if err != nil { + return err + } + _, err = io.Copy(f, r) + if err != nil { + return err + } + return cmdio.RenderWithTemplate(ctx, newDownloadCompleteEvent(sourcePath, targetPath), "Downloaded {{.SourcePath}} -> {{.TargetPath}}\n") + }) + if err != nil { + return err + } + return cmdio.RenderWithTemplate(ctx, newExportCompletedEvent(targetDir), "Export complete. Files can be found at {{.TargetPath}}\n") + }, +} + +var exportOverwrite bool + +func init() { + exportDirCommand.Flags().BoolVar(&exportOverwrite, "overwrite", false, "overwrite existing local files") + Cmd.AddCommand(exportDirCommand) +} diff --git a/internal/filer_test.go b/internal/filer_test.go index 317d7c852a..3042693ae7 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -53,6 +53,12 @@ func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) assert.True(t, errors.Is(err, fs.ErrNotExist)) + // Read should fail because the path points to a directory + err = f.Mkdir(ctx, "/dir") + require.NoError(t, err) + _, err = f.Read(ctx, "/dir") + assert.ErrorIs(t, err, fs.ErrInvalid) + // Write with CreateParentDirectories flag should succeed. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`), filer.CreateParentDirectories) assert.NoError(t, err) diff --git a/internal/workspace_test.go b/internal/workspace_test.go index f8830990d9..a78baf2bb6 100644 --- a/internal/workspace_test.go +++ b/internal/workspace_test.go @@ -1,9 +1,19 @@ package internal import ( + "context" + "errors" + "net/http" + "os" + "path/filepath" + "strings" "testing" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestAccWorkspaceList(t *testing.T) { @@ -27,3 +37,104 @@ func TestWorkpaceGetStatusErrorWhenNoArguments(t *testing.T) { _, _, err := RequireErrorRun(t, "workspace", "get-status") assert.Equal(t, "accepts 1 arg(s), received 0", err.Error()) } + +func setupWorkspaceImportExportTest(t *testing.T) (context.Context, filer.Filer, string) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w := databricks.Must(databricks.NewWorkspaceClient()) + tmpdir := temporaryWorkspaceDir(t, w) + f, err := filer.NewWorkspaceFilesClient(w, tmpdir) + require.NoError(t, err) + + // Check if we can use this API here, skip test if we cannot. + _, err = f.Read(ctx, "we_use_this_call_to_test_if_this_api_is_enabled") + var aerr *apierr.APIError + if errors.As(err, &aerr) && aerr.StatusCode == http.StatusBadRequest { + t.Skip(aerr.Message) + } + + return ctx, f, tmpdir +} + +// TODO: add tests for the progress event output logs: https://github.com/databricks/cli/issues/447 +func assertFileContents(t *testing.T, path string, content string) { + require.FileExists(t, path) + b, err := os.ReadFile(path) + require.NoError(t, err) + assert.Contains(t, string(b), content) +} + +func TestExportDir(t *testing.T) { + ctx, f, sourceDir := setupWorkspaceImportExportTest(t) + targetDir := t.TempDir() + + var err error + + // Write test data to the workspace + err = f.Write(ctx, "file-a", strings.NewReader("abc")) + require.NoError(t, err) + err = f.Write(ctx, "pyNotebook.py", strings.NewReader("# Databricks notebook source")) + require.NoError(t, err) + err = f.Write(ctx, "sqlNotebook.sql", strings.NewReader("-- Databricks notebook source")) + require.NoError(t, err) + err = f.Write(ctx, "scalaNotebook.scala", strings.NewReader("// Databricks notebook source")) + require.NoError(t, err) + err = f.Write(ctx, "rNotebook.r", strings.NewReader("# Databricks notebook source")) + require.NoError(t, err) + err = f.Write(ctx, "a/b/c/file-b", strings.NewReader("def"), filer.CreateParentDirectories) + require.NoError(t, err) + + // Run Export + RequireSuccessfulRun(t, "workspace", "export-dir", sourceDir, targetDir) + + // Assert files were exported + assertFileContents(t, filepath.Join(targetDir, "file-a"), "abc") + assertFileContents(t, filepath.Join(targetDir, "pyNotebook.py"), "# Databricks notebook source") + assertFileContents(t, filepath.Join(targetDir, "sqlNotebook.sql"), "-- Databricks notebook source") + assertFileContents(t, filepath.Join(targetDir, "rNotebook.r"), "# Databricks notebook source") + assertFileContents(t, filepath.Join(targetDir, "scalaNotebook.scala"), "// Databricks notebook source") + assertFileContents(t, filepath.Join(targetDir, "a/b/c/file-b"), "def") +} + +func TestExportDirDoesNotOverwrite(t *testing.T) { + ctx, f, sourceDir := setupWorkspaceImportExportTest(t) + targetDir := t.TempDir() + + var err error + + // Write remote file + err = f.Write(ctx, "file-a", strings.NewReader("content from workspace")) + require.NoError(t, err) + + // Write local file + err = os.WriteFile(filepath.Join(targetDir, "file-a"), []byte("local content"), os.ModePerm) + require.NoError(t, err) + + // Run Export + RequireSuccessfulRun(t, "workspace", "export-dir", sourceDir, targetDir) + + // Assert file is not overwritten + assertFileContents(t, filepath.Join(targetDir, "file-a"), "local content") +} + +func TestExportDirWithOverwriteFlag(t *testing.T) { + ctx, f, sourceDir := setupWorkspaceImportExportTest(t) + targetDir := t.TempDir() + + var err error + + // Write remote file + err = f.Write(ctx, "file-a", strings.NewReader("content from workspace")) + require.NoError(t, err) + + // Write local file + err = os.WriteFile(filepath.Join(targetDir, "file-a"), []byte("local content"), os.ModePerm) + require.NoError(t, err) + + // Run Export + RequireSuccessfulRun(t, "workspace", "export-dir", sourceDir, targetDir, "--overwrite") + + // Assert file has been overwritten + assertFileContents(t, filepath.Join(targetDir, "file-a"), "content from workspace") +} diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index b165852d83..2ea4108148 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -59,7 +59,7 @@ func (info dbfsFileInfo) IsDir() bool { } func (info dbfsFileInfo) Sys() any { - return nil + return info.fi } // DbfsClient implements the [Filer] interface for the DBFS backend. @@ -145,24 +145,20 @@ func (w *DbfsClient) Read(ctx context.Context, name string) (io.Reader, error) { return nil, err } - handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) + // This stat call serves two purposes: + // 1. Checks file at path exists, and throws an error if it does not + // 2. Allows use to error out if the path is a directory. This is needed + // because the Dbfs.Open method on the SDK does not error when the path is + // a directory + stat, err := w.Stat(ctx, name) if err != nil { - var aerr *apierr.APIError - if !errors.As(err, &aerr) { - return nil, err - } - - // This API returns a 404 if the file doesn't exist. - if aerr.StatusCode == http.StatusNotFound { - if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { - return nil, FileDoesNotExistError{absPath} - } - } - return nil, err } + if stat.IsDir() { + return nil, NotAFile{absPath} + } - return handle, nil + return w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) } func (w *DbfsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { diff --git a/libs/filer/filer.go b/libs/filer/filer.go index dcee3596ba..1c5ec2b9dd 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -68,6 +68,18 @@ func (err NotADirectory) Is(other error) bool { return other == fs.ErrInvalid } +type NotAFile struct { + path string +} + +func (err NotAFile) Error() string { + return fmt.Sprintf("not a file: %s", err.path) +} + +func (err NotAFile) Is(other error) bool { + return other == fs.ErrInvalid +} + type DirectoryNotEmptyError struct { path string } diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 9b4178afeb..eb9dfa7084 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -3,6 +3,7 @@ package filer import ( "bytes" "context" + "encoding/base64" "errors" "fmt" "io" @@ -65,7 +66,7 @@ func (info wsfsFileInfo) IsDir() bool { } func (info wsfsFileInfo) Sys() any { - return nil + return info.oi } // WorkspaceFilesClient implements the files-in-workspace API. @@ -151,37 +152,38 @@ func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io return err } +// TODO: test what happens on trying to read a directory. Ideally send an invalid param error func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err } - // Remove leading "/" so we can use it in the URL. - urlPath := fmt.Sprintf( - "/api/2.0/workspace-files/%s", - strings.TrimLeft(absPath, "/"), - ) - - var res []byte - err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, &res) - - // Return early on success. - if err == nil { - return bytes.NewReader(res), nil + // This stat call serves two purposes: + // 1. Checks file at path exists, and throws an error if it does not + // 2. Allows use to error out if the path is a directory. This is needed + // because the /workspace/export API does not error out + stat, err := w.Stat(ctx, name) + if err != nil { + return nil, err + } + if stat.IsDir() { + return nil, NotAFile{absPath} } - // Special handling of this error only if it is an API error. - var aerr *apierr.APIError - if !errors.As(err, &aerr) { + // Export file contents. Note the /workspace/export API has a limit of 10MBs + // for the file size + res, err := w.workspaceClient.Workspace.Export(ctx, workspace.ExportRequest{ + Path: absPath, + }) + if err != nil { return nil, err } - - if aerr.StatusCode == http.StatusNotFound { - return nil, FileDoesNotExistError{absPath} + b, err := base64.StdEncoding.DecodeString(res.Content) + if err != nil { + return nil, err } - - return nil, err + return bytes.NewReader(b), nil } func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { From 589f2009ff53331881086e71583ab8a1382830e2 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 7 Jun 2023 19:11:14 +0200 Subject: [PATCH 02/12] - --- cmd/workspace/workspace/export_dir.go | 3 +-- libs/filer/workspace_files_client.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index 2f11d9c4f1..7c0f26c4f3 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -77,7 +77,6 @@ based on the language type. } } - // TODO: test this behaviour // Skip file if a file already exists in path if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { // Log event that this file/directory has been skipped @@ -91,7 +90,7 @@ based on the language type. } defer f.Close() - // stream write content to the local file + // Write content to the local file r, err := workspaceFiler.Read(ctx, relPath) if err != nil { return err diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index eb9dfa7084..5fe691e0ab 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -173,7 +173,7 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader // Export file contents. Note the /workspace/export API has a limit of 10MBs // for the file size - res, err := w.workspaceClient.Workspace.Export(ctx, workspace.ExportRequest{ + res, err := w.workspaceClient.Workspace.Export(ctx, workspace.ExportRequest{ Path: absPath, }) if err != nil { From 5e1462e46d7394b1b9b3b3c6f3159e001812fb3a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 7 Jun 2023 19:17:36 +0200 Subject: [PATCH 03/12] - --- cmd/workspace/workspace/export_dir.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index 7c0f26c4f3..9f89194372 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -104,7 +104,7 @@ based on the language type. if err != nil { return err } - return cmdio.RenderWithTemplate(ctx, newExportCompletedEvent(targetDir), "Export complete. Files can be found at {{.TargetPath}}\n") + return cmdio.RenderWithTemplate(ctx, newExportCompletedEvent(targetDir), "Export complete. Files can be found at {{.TargetPath}}\n") }, } From 999df06a53da10e64e733aa62aaf3b26030036f3 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 7 Jun 2023 19:22:11 +0200 Subject: [PATCH 04/12] - --- libs/filer/workspace_files_client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 5fe691e0ab..13114822d3 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -152,7 +152,6 @@ func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io return err } -// TODO: test what happens on trying to read a directory. Ideally send an invalid param error func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader, error) { absPath, err := w.root.Join(name) if err != nil { From 53ae92fdaaf2fd6bac9c96b456eeb43291679217 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 7 Jun 2023 19:34:46 +0200 Subject: [PATCH 05/12] - --- internal/workspace_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/workspace_test.go b/internal/workspace_test.go index a78baf2bb6..bfa323c5d7 100644 --- a/internal/workspace_test.go +++ b/internal/workspace_test.go @@ -65,7 +65,7 @@ func assertFileContents(t *testing.T, path string, content string) { assert.Contains(t, string(b), content) } -func TestExportDir(t *testing.T) { +func TestAccExportDir(t *testing.T) { ctx, f, sourceDir := setupWorkspaceImportExportTest(t) targetDir := t.TempDir() @@ -97,7 +97,7 @@ func TestExportDir(t *testing.T) { assertFileContents(t, filepath.Join(targetDir, "a/b/c/file-b"), "def") } -func TestExportDirDoesNotOverwrite(t *testing.T) { +func TestAccExportDirDoesNotOverwrite(t *testing.T) { ctx, f, sourceDir := setupWorkspaceImportExportTest(t) targetDir := t.TempDir() @@ -118,7 +118,7 @@ func TestExportDirDoesNotOverwrite(t *testing.T) { assertFileContents(t, filepath.Join(targetDir, "file-a"), "local content") } -func TestExportDirWithOverwriteFlag(t *testing.T) { +func TestAccExportDirWithOverwriteFlag(t *testing.T) { ctx, f, sourceDir := setupWorkspaceImportExportTest(t) targetDir := t.TempDir() From f33e0a19d8b3c279eaad60d045c30572faf1ea65 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 8 Jun 2023 13:27:45 +0200 Subject: [PATCH 06/12] refactor out walkdir callback --- cmd/workspace/workspace/export_dir.go | 123 ++++++++++++++------------ 1 file changed, 65 insertions(+), 58 deletions(-) diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index 9f89194372..1809728431 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -1,6 +1,7 @@ package workspace import ( + "context" "io" "io/fs" "os" @@ -14,6 +15,69 @@ import ( "github.com/spf13/cobra" ) +// The callback function exports the file specified at relPath. This function is +// meant to be used in conjunction with fs.WalkDir +func exportFileCallback(ctx context.Context, workspaceFiler filer.Filer, sourceDir, targetDir string) func(string, fs.DirEntry, error) error { + return func(relPath string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + sourcePath := path.Join(sourceDir, relPath) + targetPath := filepath.Join(targetDir, relPath) + + // create directory and return early + if d.IsDir() { + return os.MkdirAll(targetPath, 0755) + } + + // Add extension to local file path if the file is a notebook + stat, err := workspaceFiler.Stat(ctx, relPath) + if err != nil { + return err + } + info := stat.Sys().(workspace.ObjectInfo) + if info.ObjectType == workspace.ObjectTypeNotebook { + switch info.Language { + case workspace.LanguagePython: + targetPath += ".py" + case workspace.LanguageR: + targetPath += ".r" + case workspace.LanguageScala: + targetPath += ".scala" + case workspace.LanguageSql: + targetPath += ".sql" + default: + // Do not add any extension to the file name + } + } + + // Skip file if a file already exists in path + if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { + // Log event that this file/directory has been skipped + return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(sourcePath, targetPath), "Skipping {{.SourcePath}} because {{.TargetPath}} already exists\n") + } + + // create the file + f, err := os.Create(targetPath) + if err != nil { + return err + } + defer f.Close() + + // Write content to the local file + r, err := workspaceFiler.Read(ctx, relPath) + if err != nil { + return err + } + _, err = io.Copy(f, r) + if err != nil { + return err + } + return cmdio.RenderWithTemplate(ctx, newDownloadCompleteEvent(sourcePath, targetPath), "Downloaded {{.SourcePath}} -> {{.TargetPath}}\n") + } +} + var exportDirCommand = &cobra.Command{ Use: "export-dir SOURCE_PATH TARGET_PATH", Short: `Export a directory from a Databricks workspace to the local file system.`, @@ -43,64 +107,7 @@ based on the language type. return err } - err = fs.WalkDir(workspaceFS, ".", func(relPath string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - sourcePath := path.Join(sourceDir, relPath) - targetPath := filepath.Join(targetDir, relPath) - - // create directory and return early - if d.IsDir() { - return os.MkdirAll(targetPath, 0755) - } - - // Add extension to local file path if the file is a notebook - stat, err := workspaceFiler.Stat(ctx, relPath) - if err != nil { - return err - } - info := stat.Sys().(workspace.ObjectInfo) - if info.ObjectType == workspace.ObjectTypeNotebook { - switch info.Language { - case workspace.LanguagePython: - targetPath += ".py" - case workspace.LanguageR: - targetPath += ".r" - case workspace.LanguageScala: - targetPath += ".scala" - case workspace.LanguageSql: - targetPath += ".sql" - default: - // Do not add any extension to the file name - } - } - - // Skip file if a file already exists in path - if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { - // Log event that this file/directory has been skipped - return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(sourcePath, targetPath), "Skipping {{.SourcePath}} because {{.TargetPath}} already exists\n") - } - - // create the file - f, err := os.Create(targetPath) - if err != nil { - return err - } - defer f.Close() - - // Write content to the local file - r, err := workspaceFiler.Read(ctx, relPath) - if err != nil { - return err - } - _, err = io.Copy(f, r) - if err != nil { - return err - } - return cmdio.RenderWithTemplate(ctx, newDownloadCompleteEvent(sourcePath, targetPath), "Downloaded {{.SourcePath}} -> {{.TargetPath}}\n") - }) + err = fs.WalkDir(workspaceFS, ".", exportFileCallback(ctx, workspaceFiler, sourceDir, targetDir)) if err != nil { return err } From 0df6dea6f5e41f08414260281bdfc012d4812ee3 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 8 Jun 2023 13:32:31 +0200 Subject: [PATCH 07/12] add clarifying comment --- cmd/workspace/workspace/export_dir.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index 1809728431..d421dae061 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -52,7 +52,9 @@ func exportFileCallback(ctx context.Context, workspaceFiler filer.Filer, sourceD } } - // Skip file if a file already exists in path + // Skip file if a file already exists in path. + // os.Stat returns a fs.ErrNotExist if a file does not exist at path. + // If a file exists, and overwrite is not set, we skip exporting the file if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { // Log event that this file/directory has been skipped return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(sourcePath, targetPath), "Skipping {{.SourcePath}} because {{.TargetPath}} already exists\n") From 04347e94181a7d54b417a8a4e1657f4186d687e4 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 8 Jun 2023 13:35:44 +0200 Subject: [PATCH 08/12] remove extra sys call --- cmd/workspace/workspace/export_dir.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index d421dae061..af79117686 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -32,13 +32,13 @@ func exportFileCallback(ctx context.Context, workspaceFiler filer.Filer, sourceD } // Add extension to local file path if the file is a notebook - stat, err := workspaceFiler.Stat(ctx, relPath) + info, err := d.Info() if err != nil { return err } - info := stat.Sys().(workspace.ObjectInfo) - if info.ObjectType == workspace.ObjectTypeNotebook { - switch info.Language { + objectInfo := info.Sys().(workspace.ObjectInfo) + if objectInfo.ObjectType == workspace.ObjectTypeNotebook { + switch objectInfo.Language { case workspace.LanguagePython: targetPath += ".py" case workspace.LanguageR: From 4dcf7159959218f1702cf3a8e4c344e17914a268 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 8 Jun 2023 13:49:34 +0200 Subject: [PATCH 09/12] add todo followup to remove stat call --- libs/filer/dbfs_client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 2ea4108148..14919f1ca1 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -150,6 +150,7 @@ func (w *DbfsClient) Read(ctx context.Context, name string) (io.Reader, error) { // 2. Allows use to error out if the path is a directory. This is needed // because the Dbfs.Open method on the SDK does not error when the path is // a directory + // TODO: remove this stat call on go sdk bump. https://github.com/databricks/cli/issues/450 stat, err := w.Stat(ctx, name) if err != nil { return nil, err From 90c00b46bcb6326d8aeeebd56a43827b947d5b54 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 8 Jun 2023 13:50:28 +0200 Subject: [PATCH 10/12] - --- libs/filer/dbfs_client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 14919f1ca1..21c7ca6a84 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -150,7 +150,7 @@ func (w *DbfsClient) Read(ctx context.Context, name string) (io.Reader, error) { // 2. Allows use to error out if the path is a directory. This is needed // because the Dbfs.Open method on the SDK does not error when the path is // a directory - // TODO: remove this stat call on go sdk bump. https://github.com/databricks/cli/issues/450 + // TODO(added 8 June 2023): remove this stat call on go sdk bump. https://github.com/databricks/cli/issues/450 stat, err := w.Stat(ctx, name) if err != nil { return nil, err From 03bdf16035af95ad5751c4604ed60b69fefa4f61 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 8 Jun 2023 16:34:09 +0200 Subject: [PATCH 11/12] Address comments --- cmd/workspace/workspace/events.go | 22 +++++++++++++--------- cmd/workspace/workspace/export_dir.go | 4 ++-- libs/filer/dbfs_client.go | 2 +- libs/filer/workspace_files_client.go | 6 ++++-- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/cmd/workspace/workspace/events.go b/cmd/workspace/workspace/events.go index 9479786938..c4eb0f74bf 100644 --- a/cmd/workspace/workspace/events.go +++ b/cmd/workspace/workspace/events.go @@ -1,21 +1,25 @@ package workspace type fileIOEvent struct { - SourcePath string `json:"source_path,omitempty"` - TargetPath string `json:"target_path,omitempty"` - Type string `json:"type"` + SourcePath string `json:"source_path,omitempty"` + TargetPath string `json:"target_path,omitempty"` + Type EventType `json:"type"` } -const EventTypeDownloadComplete = "DOWNLOAD_COMPLETE" -const EventTypeExportStarted = "EXPORT_STARTED" -const EventTypeExportCompleted = "EXPORT_COMPLETED" -const EventTypeFileSkipped = "FILE_SKIPPED" +type EventType string -func newDownloadCompleteEvent(sourcePath, targetPath string) fileIOEvent { +const ( + EventTypeFileExported = EventType("FILE_EXPORTED") + EventTypeExportStarted = EventType("EXPORT_STARTED") + EventTypeExportCompleted = EventType("EXPORT_COMPLETED") + EventTypeFileSkipped = EventType("FILE_SKIPPED") +) + +func newFileExportedEvent(sourcePath, targetPath string) fileIOEvent { return fileIOEvent{ SourcePath: sourcePath, TargetPath: targetPath, - Type: EventTypeDownloadComplete, + Type: EventTypeFileExported, } } diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index af79117686..07fe7e6c14 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -57,7 +57,7 @@ func exportFileCallback(ctx context.Context, workspaceFiler filer.Filer, sourceD // If a file exists, and overwrite is not set, we skip exporting the file if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { // Log event that this file/directory has been skipped - return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(sourcePath, targetPath), "Skipping {{.SourcePath}} because {{.TargetPath}} already exists\n") + return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(sourcePath, targetPath), "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n") } // create the file @@ -76,7 +76,7 @@ func exportFileCallback(ctx context.Context, workspaceFiler filer.Filer, sourceD if err != nil { return err } - return cmdio.RenderWithTemplate(ctx, newDownloadCompleteEvent(sourcePath, targetPath), "Downloaded {{.SourcePath}} -> {{.TargetPath}}\n") + return cmdio.RenderWithTemplate(ctx, newFileExportedEvent(sourcePath, targetPath), "{{.SourcePath}} -> {{.TargetPath}}\n") } } diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 21c7ca6a84..dbf3cf60b0 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -147,7 +147,7 @@ func (w *DbfsClient) Read(ctx context.Context, name string) (io.Reader, error) { // This stat call serves two purposes: // 1. Checks file at path exists, and throws an error if it does not - // 2. Allows use to error out if the path is a directory. This is needed + // 2. Allows us to error out if the path is a directory. This is needed // because the Dbfs.Open method on the SDK does not error when the path is // a directory // TODO(added 8 June 2023): remove this stat call on go sdk bump. https://github.com/databricks/cli/issues/450 diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 13114822d3..4d310b1a80 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -160,8 +160,9 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader // This stat call serves two purposes: // 1. Checks file at path exists, and throws an error if it does not - // 2. Allows use to error out if the path is a directory. This is needed - // because the /workspace/export API does not error out + // 2. Allows us to error out if the path is a directory. This is needed + // because the /workspace/export API does not error out, and returns the directory + // as a DBC archive even if format "SOURCE" is specified stat, err := w.Stat(ctx, name) if err != nil { return nil, err @@ -172,6 +173,7 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader // Export file contents. Note the /workspace/export API has a limit of 10MBs // for the file size + // TODO: use direct download once it's fixed. see: https://github.com/databricks/cli/issues/452 res, err := w.workspaceClient.Workspace.Export(ctx, workspace.ExportRequest{ Path: absPath, }) From 62dc7d585d3ed5104256e752519d1ec834541cf2 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 8 Jun 2023 17:05:10 +0200 Subject: [PATCH 12/12] skip rendering initial logs and full workspace paths --- cmd/workspace/workspace/export_dir.go | 6 +++--- libs/cmdio/io.go | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cmd/workspace/workspace/export_dir.go b/cmd/workspace/workspace/export_dir.go index 07fe7e6c14..1c3fe968f1 100644 --- a/cmd/workspace/workspace/export_dir.go +++ b/cmd/workspace/workspace/export_dir.go @@ -57,7 +57,7 @@ func exportFileCallback(ctx context.Context, workspaceFiler filer.Filer, sourceD // If a file exists, and overwrite is not set, we skip exporting the file if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { // Log event that this file/directory has been skipped - return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(sourcePath, targetPath), "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n") + return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(relPath, targetPath), "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n") } // create the file @@ -104,7 +104,7 @@ based on the language type. workspaceFS := filer.NewFS(ctx, workspaceFiler) // TODO: print progress events on stderr instead: https://github.com/databricks/cli/issues/448 - err = cmdio.RenderWithTemplate(ctx, newExportStartedEvent(sourceDir), "Export started. Download files from {{.SourcePath}}\n") + err = cmdio.RenderJson(ctx, newExportStartedEvent(sourceDir)) if err != nil { return err } @@ -113,7 +113,7 @@ based on the language type. if err != nil { return err } - return cmdio.RenderWithTemplate(ctx, newExportCompletedEvent(targetDir), "Export complete. Files can be found at {{.TargetPath}}\n") + return cmdio.RenderJson(ctx, newExportCompletedEvent(targetDir)) }, } diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index 32637b1d4d..762a94550a 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -87,6 +87,14 @@ func RenderWithTemplate(ctx context.Context, v any, template string) error { } } +func RenderJson(ctx context.Context, v any) error { + c := fromContext(ctx) + if c.outputFormat == flags.OutputJSON { + return renderJson(c.out, v) + } + return nil +} + func RenderReader(ctx context.Context, r io.Reader) error { c := fromContext(ctx) switch c.outputFormat {