-
Notifications
You must be signed in to change notification settings - Fork 154
Add workspace export-dir command #449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e0f6d68
589f200
2b4cad1
5e1462e
999df06
53ae92f
f33e0a1
0df6dea
04347e9
4dcf715
90c00b4
9681cc0
03bdf16
62dc7d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| package workspace | ||
|
|
||
| type fileIOEvent struct { | ||
| SourcePath string `json:"source_path,omitempty"` | ||
| TargetPath string `json:"target_path,omitempty"` | ||
| Type EventType `json:"type"` | ||
| } | ||
|
|
||
| type EventType string | ||
|
|
||
| const ( | ||
| EventTypeFileExported = EventType("FILE_EXPORTED") | ||
| EventTypeExportStarted = EventType("EXPORT_STARTED") | ||
| EventTypeExportCompleted = EventType("EXPORT_COMPLETED") | ||
| EventTypeFileSkipped = EventType("FILE_SKIPPED") | ||
| ) | ||
|
|
||
| func newFileExportedEvent(sourcePath, targetPath string) fileIOEvent { | ||
| return fileIOEvent{ | ||
| SourcePath: sourcePath, | ||
| TargetPath: targetPath, | ||
| Type: EventTypeFileExported, | ||
| } | ||
| } | ||
|
|
||
| func newExportCompletedEvent(targetPath string) fileIOEvent { | ||
| return fileIOEvent{ | ||
| TargetPath: targetPath, | ||
| Type: EventTypeExportCompleted, | ||
| } | ||
| } | ||
|
|
||
| func newFileSkippedEvent(sourcePath, targetPath string) fileIOEvent { | ||
| return fileIOEvent{ | ||
| SourcePath: sourcePath, | ||
| TargetPath: targetPath, | ||
| Type: EventTypeFileSkipped, | ||
| } | ||
| } | ||
|
|
||
| func newExportStartedEvent(sourcePath string) fileIOEvent { | ||
| return fileIOEvent{ | ||
| SourcePath: sourcePath, | ||
| Type: EventTypeExportStarted, | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| package workspace | ||
|
|
||
| import ( | ||
| "context" | ||
| "io" | ||
| "io/fs" | ||
| "os" | ||
| "path" | ||
| "path/filepath" | ||
|
|
||
| "github.com/databricks/cli/cmd/root" | ||
| "github.com/databricks/cli/libs/cmdio" | ||
| "github.com/databricks/cli/libs/filer" | ||
| "github.com/databricks/databricks-sdk-go/service/workspace" | ||
| "github.com/spf13/cobra" | ||
| ) | ||
|
|
||
| // The callback function exports the file specified at relPath. This function is | ||
| // meant to be used in conjunction with fs.WalkDir | ||
| func exportFileCallback(ctx context.Context, workspaceFiler filer.Filer, sourceDir, targetDir string) func(string, fs.DirEntry, error) error { | ||
| return func(relPath string, d fs.DirEntry, err error) error { | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| sourcePath := path.Join(sourceDir, relPath) | ||
| targetPath := filepath.Join(targetDir, relPath) | ||
|
|
||
| // create directory and return early | ||
| if d.IsDir() { | ||
| return os.MkdirAll(targetPath, 0755) | ||
| } | ||
|
|
||
| // Add extension to local file path if the file is a notebook | ||
| info, err := d.Info() | ||
| if err != nil { | ||
| return err | ||
| } | ||
| objectInfo := info.Sys().(workspace.ObjectInfo) | ||
| if objectInfo.ObjectType == workspace.ObjectTypeNotebook { | ||
| switch objectInfo.Language { | ||
| case workspace.LanguagePython: | ||
| targetPath += ".py" | ||
| case workspace.LanguageR: | ||
| targetPath += ".r" | ||
| case workspace.LanguageScala: | ||
| targetPath += ".scala" | ||
| case workspace.LanguageSql: | ||
| targetPath += ".sql" | ||
| default: | ||
| // Do not add any extension to the file name | ||
| } | ||
| } | ||
|
|
||
| // Skip file if a file already exists in path. | ||
| // os.Stat returns a fs.ErrNotExist if a file does not exist at path. | ||
| // If a file exists, and overwrite is not set, we skip exporting the file | ||
| if _, err := os.Stat(targetPath); err == nil && !exportOverwrite { | ||
| // Log event that this file/directory has been skipped | ||
| return cmdio.RenderWithTemplate(ctx, newFileSkippedEvent(relPath, targetPath), "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n") | ||
| } | ||
|
|
||
| // create the file | ||
| f, err := os.Create(targetPath) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| defer f.Close() | ||
|
|
||
| // Write content to the local file | ||
| r, err := workspaceFiler.Read(ctx, relPath) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| _, err = io.Copy(f, r) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| return cmdio.RenderWithTemplate(ctx, newFileExportedEvent(sourcePath, targetPath), "{{.SourcePath}} -> {{.TargetPath}}\n") | ||
| } | ||
| } | ||
|
|
||
| var exportDirCommand = &cobra.Command{ | ||
| Use: "export-dir SOURCE_PATH TARGET_PATH", | ||
| Short: `Export a directory from a Databricks workspace to the local file system.`, | ||
| Long: ` | ||
| Export a directory recursively from a Databricks workspace to the local file system. | ||
| Notebooks will have one of the following extensions added .scala, .py, .sql, or .r | ||
| based on the language type. | ||
| `, | ||
| PreRunE: root.MustWorkspaceClient, | ||
| Args: cobra.ExactArgs(2), | ||
| RunE: func(cmd *cobra.Command, args []string) (err error) { | ||
| ctx := cmd.Context() | ||
| w := root.WorkspaceClient(ctx) | ||
| sourceDir := args[0] | ||
| targetDir := args[1] | ||
|
|
||
| // Initialize a filer and a file system on the source directory | ||
| workspaceFiler, err := filer.NewWorkspaceFilesClient(w, sourceDir) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| workspaceFS := filer.NewFS(ctx, workspaceFiler) | ||
|
|
||
| // TODO: print progress events on stderr instead: https://github.com/databricks/cli/issues/448 | ||
| err = cmdio.RenderJson(ctx, newExportStartedEvent(sourceDir)) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| err = fs.WalkDir(workspaceFS, ".", exportFileCallback(ctx, workspaceFiler, sourceDir, targetDir)) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| return cmdio.RenderJson(ctx, newExportCompletedEvent(targetDir)) | ||
| }, | ||
| } | ||
|
|
||
| var exportOverwrite bool | ||
|
|
||
| func init() { | ||
| exportDirCommand.Flags().BoolVar(&exportOverwrite, "overwrite", false, "overwrite existing local files") | ||
| Cmd.AddCommand(exportDirCommand) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,7 +59,7 @@ func (info dbfsFileInfo) IsDir() bool { | |
| } | ||
|
|
||
| func (info dbfsFileInfo) Sys() any { | ||
| return nil | ||
| return info.fi | ||
| } | ||
|
|
||
| // DbfsClient implements the [Filer] interface for the DBFS backend. | ||
|
|
@@ -145,24 +145,21 @@ func (w *DbfsClient) Read(ctx context.Context, name string) (io.Reader, error) { | |
| return nil, err | ||
| } | ||
|
|
||
| handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) | ||
| // This stat call serves two purposes: | ||
| // 1. Checks file at path exists, and throws an error if it does not | ||
| // 2. Allows us to error out if the path is a directory. This is needed | ||
| // because the Dbfs.Open method on the SDK does not error when the path is | ||
| // a directory | ||
| // TODO(added 8 June 2023): remove this stat call on go sdk bump. https://github.com/databricks/cli/issues/450 | ||
| stat, err := w.Stat(ctx, name) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in databricks/databricks-sdk-go#415 I would prefer not adding a stat call for every read if we can avoid it.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have added a todo with a follow-up issue filed. I would rather not block on a go SDK release or remove test coverage |
||
| if err != nil { | ||
| var aerr *apierr.APIError | ||
| if !errors.As(err, &aerr) { | ||
| return nil, err | ||
| } | ||
|
|
||
| // This API returns a 404 if the file doesn't exist. | ||
| if aerr.StatusCode == http.StatusNotFound { | ||
| if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { | ||
| return nil, FileDoesNotExistError{absPath} | ||
| } | ||
| } | ||
|
|
||
| return nil, err | ||
| } | ||
| if stat.IsDir() { | ||
| return nil, NotAFile{absPath} | ||
| } | ||
|
|
||
| return handle, nil | ||
| return w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) | ||
| } | ||
|
|
||
| func (w *DbfsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.