From e1780a4a1641a5c9173da43d6b1235131cac40f7 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 11:35:13 +0200 Subject: [PATCH 1/8] Add databricks fs ls command for WSFS --- cmd/fs/ls.go | 72 ++++++++++++++++++++++++++-- libs/cmdio/io.go | 12 +++++ libs/filer/filer.go | 18 +++++++ libs/filer/root_path.go | 5 -- libs/filer/workspace_files_client.go | 27 +++++++++++ 5 files changed, 124 insertions(+), 10 deletions(-) diff --git a/cmd/fs/ls.go b/cmd/fs/ls.go index ac19238573..eccdeacba8 100644 --- a/cmd/fs/ls.go +++ b/cmd/fs/ls.go @@ -2,22 +2,84 @@ package fs import ( "fmt" + "path" + "time" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/filer" "github.com/spf13/cobra" ) +func parseFileInfo(info filer.FileInfo, parentDir string, isAbsolute bool) map[string]string { + fullName := info.Name + if isAbsolute { + fullName = path.Join(parentDir, info.Name) + } + return map[string]string{ + "Name": fullName, + "ModTime": info.ModTime.UTC().Format(time.UnixDate), + "Size": fmt.Sprint(info.Size), + "Type": info.Type, + } +} + // lsCmd represents the ls command var lsCmd = &cobra.Command{ - Use: "ls ", - Short: "Lists files", - Long: `Lists files`, - Hidden: true, + Use: "ls ", + Short: "Lists files", + Long: `Lists files in a DBFS or WSFS directory`, + Args: cobra.MaximumNArgs(1), + Annotations: map[string]string{}, + PreRunE: root.MustWorkspaceClient, RunE: func(cmd *cobra.Command, args []string) error { - return fmt.Errorf("TODO") + // Assign template according to whether -l is specified + template := cmdio.Heredoc(` + {{range .}}{{.Name}} + {{end}} + `) + if longMode { + template = cmdio.Heredoc(` + {{range .}}{{.Type|printf "%-10s"}} {{.Size}} {{.ModTime}} {{.Name}} + {{end}} + `) + } + + // Path to list files from. Defaults to`/` + path := "/" + if len(args) > 0 { + path = args[0] + } + + // Initialize workspace client + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + f, err := filer.NewWorkspaceFilesClient(w, path) + if err != nil { + return err + } + + // Get file info + filesInfo, err := f.ReadDir(ctx, "") + if err != nil { + return err + } + + // Parse it so it's ready to be rendered + output := make([]map[string]string, 0) + for _, info := range filesInfo { + output = append(output, parseFileInfo(info, path, absolute)) + } + return cmdio.RenderWithTemplate(ctx, output, template) }, } +var longMode bool +var absolute bool + func init() { + lsCmd.Flags().BoolVarP(&longMode, "long", "l", false, "Displays full information including size, file type and modification time since Epoch in milliseconds.") + lsCmd.Flags().BoolVar(&absolute, "absolute", false, "Displays absolute paths.") fsCmd.AddCommand(lsCmd) } diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index e5a7199037..8326557fcb 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -71,6 +71,18 @@ func (c *cmdIO) Render(v any) error { } } +func RenderWithTemplate(ctx context.Context, v any, template string) error { + c := fromContext(ctx) + switch c.outputFormat { + case flags.OutputJSON: + return renderJson(c.out, v) + case flags.OutputText: + return renderTemplate(c.out, template, v) + default: + return fmt.Errorf("invalid output format: %s", c.outputFormat) + } +} + func Render(ctx context.Context, v any) error { c := fromContext(ctx) return c.Render(v) diff --git a/libs/filer/filer.go b/libs/filer/filer.go index 92de6e128c..e8420b818b 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "time" ) type WriteMode int @@ -13,6 +14,20 @@ const ( CreateParentDirectories = iota << 1 ) +type FileInfo struct { + // The type of the file in workspace + Type string + + // Base name of the file + Name string + + // Size in bytes of the file + Size int64 + + // Last Modified time of the file + ModTime time.Time +} + type FileAlreadyExistsError struct { path string } @@ -41,4 +56,7 @@ type Filer interface { // Delete file at `path`. Delete(ctx context.Context, path string) error + + // Return contents of directory at `path` + ReadDir(ctx context.Context, path string) ([]FileInfo, error) } diff --git a/libs/filer/root_path.go b/libs/filer/root_path.go index 65b26d5310..bdeff5d73c 100644 --- a/libs/filer/root_path.go +++ b/libs/filer/root_path.go @@ -30,10 +30,5 @@ func (p *RootPath) Join(name string) (string, error) { return "", fmt.Errorf("relative path escapes root: %s", name) } - // Don't allow name to resolve to the root path. - if strings.TrimPrefix(absPath, p.rootPath) == "" { - return "", fmt.Errorf("relative path resolves to root: %s", name) - } - return absPath, nil } diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index ff813f0913..7396c4c91f 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -10,6 +10,7 @@ import ( "net/url" "path" "strings" + "time" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" @@ -31,6 +32,7 @@ type WorkspaceFilesClient struct { } func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) { + //TODO apiClient, err := client.New(w.Config) if err != nil { return nil, err @@ -128,3 +130,28 @@ func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string) error { Recursive: false, }) } + +func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]FileInfo, error) { + absPath, err := w.root.Join(name) + if err != nil { + return nil, err + } + + objects, err := w.workspaceClient.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{ + Path: absPath, + }) + if err != nil { + return nil, err + } + + info := make([]FileInfo, 0) + for _, i := range objects { + info = append(info, FileInfo{ + Type: string(i.ObjectType), + Name: path.Base(i.Path), + Size: i.Size, + ModTime: time.UnixMilli(i.ModifiedAt), + }) + } + return info, nil +} From 53f0228bf2f349d7cb2d84108bd2c6469d14aca8 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 11:37:32 +0200 Subject: [PATCH 2/8] nit --- libs/filer/workspace_files_client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 7396c4c91f..c76d997a1c 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -32,7 +32,6 @@ type WorkspaceFilesClient struct { } func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) { - //TODO apiClient, err := client.New(w.Config) if err != nil { return nil, err From 8a9175e33264a685b5481ad6264cde0ab792bfa3 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 11:42:20 +0200 Subject: [PATCH 3/8] fix tests --- libs/filer/root_path_test.go | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/libs/filer/root_path_test.go b/libs/filer/root_path_test.go index 3787ef36bf..965842d030 100644 --- a/libs/filer/root_path_test.go +++ b/libs/filer/root_path_test.go @@ -31,6 +31,26 @@ func testRootPath(t *testing.T, uncleanRoot string) { assert.NoError(t, err) assert.Equal(t, cleanRoot+"/a/b/f/g", remotePath) + remotePath, err = rp.Join(".//a/..//./b/..") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("a/b/../..") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join(".") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("/") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + _, err = rp.Join("..") assert.ErrorContains(t, err, `relative path escapes root: ..`) @@ -57,21 +77,6 @@ func testRootPath(t *testing.T, uncleanRoot string) { _, err = rp.Join("../..") assert.ErrorContains(t, err, `relative path escapes root: ../..`) - - _, err = rp.Join(".//a/..//./b/..") - assert.ErrorContains(t, err, `relative path resolves to root: .//a/..//./b/..`) - - _, err = rp.Join("a/b/../..") - assert.ErrorContains(t, err, "relative path resolves to root: a/b/../..") - - _, err = rp.Join("") - assert.ErrorContains(t, err, "relative path resolves to root: ") - - _, err = rp.Join(".") - assert.ErrorContains(t, err, "relative path resolves to root: .") - - _, err = rp.Join("/") - assert.ErrorContains(t, err, "relative path resolves to root: /") } func TestRootPathClean(t *testing.T) { From 7c46b35597446b30d27746c1570fa4896d688a5f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 13:02:50 +0200 Subject: [PATCH 4/8] preallocate array --- libs/filer/workspace_files_client.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index c76d997a1c..2203ff9bca 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -143,14 +143,14 @@ func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]File return nil, err } - info := make([]FileInfo, 0) - for _, i := range objects { - info = append(info, FileInfo{ - Type: string(i.ObjectType), - Name: path.Base(i.Path), - Size: i.Size, - ModTime: time.UnixMilli(i.ModifiedAt), - }) + info := make([]FileInfo, len(objects)) + for i, v := range objects { + info[i] = FileInfo{ + Type: string(v.ObjectType), + Name: path.Base(v.Path), + Size: v.Size, + ModTime: time.UnixMilli(v.ModifiedAt), + } } return info, nil } From c49df325d668b1f844dbb3e927c0a8571a763097 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 17:13:45 +0200 Subject: [PATCH 5/8] added integration testst for ls --- internal/fs/ls_test.go | 85 +++++++++++++++++++++++++ internal/helpers/workspace_testdata.go | 88 ++++++++++++++++++++++++++ libs/filer/filer.go | 3 + libs/filer/workspace_files_client.go | 10 +++ 4 files changed, 186 insertions(+) create mode 100644 internal/fs/ls_test.go create mode 100644 internal/helpers/workspace_testdata.go diff --git a/internal/fs/ls_test.go b/internal/fs/ls_test.go new file mode 100644 index 0000000000..757fa72355 --- /dev/null +++ b/internal/fs/ls_test.go @@ -0,0 +1,85 @@ +package fs + +import ( + "encoding/json" + "fmt" + "path" + "testing" + + _ "github.com/databricks/cli/cmd/fs" + "github.com/databricks/cli/internal" + "github.com/databricks/cli/internal/helpers" + "github.com/databricks/cli/libs/cmdio" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func assertObjectListed(t *testing.T, parsedLogs []map[string]string, name string, objectType string) { + foundFile := false + for _, v := range parsedLogs { + if v["Name"] != name { + continue + } + foundFile = true + assert.Equal(t, objectType, v["Type"]) + } + assert.True(t, foundFile, fmt.Sprintf("failed to find file %s in output logs", name)) +} + +func TestAccFsLs(t *testing.T) { + t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV")) + + // setup some testdata in the workspace + w := helpers.NewWorkspaceTestdata(t) + w.AddFile("foo.txt", `hello, world`) + w.AddFile("python_notebook.py", cmdio.Heredoc(` + #Databricks notebook source + print(2)`)) + w.AddFile("python_file.py", `print(1)`) + w.Mkdir("my_directory") + w.AddFile("my_directory/.gitkeep", "") + + // run list command + stdout, stderr := internal.RequireSuccessfulRun(t, "fs", "ls", w.RootPath(), "--output=json") + + // read and parse the output logs + parsedLogs := make([]map[string]string, 0) + err := json.Unmarshal(stdout.Bytes(), &parsedLogs) + require.NoError(t, err) + + // make assertions on the output logs + assert.Equal(t, stderr.String(), "") + assertObjectListed(t, parsedLogs, "python_file.py", "FILE") + assertObjectListed(t, parsedLogs, "foo.txt", "FILE") + assertObjectListed(t, parsedLogs, "python_notebook", "NOTEBOOK") + assertObjectListed(t, parsedLogs, "my_directory", "DIRECTORY") +} + +func TestAccFsLsWithAbsoluteFlag(t *testing.T) { + t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV")) + + // setup some testdata in the workspace + w := helpers.NewWorkspaceTestdata(t) + w.AddFile("foo.txt", `hello, world`) + w.AddFile("python_notebook.py", cmdio.Heredoc(` + #Databricks notebook source + print(2)`)) + w.AddFile("python_file.py", `print(1)`) + w.Mkdir("my_directory") + w.AddFile("my_directory/.gitkeep", "") + + // run list command + stdout, stderr := internal.RequireSuccessfulRun(t, "fs", "ls", w.RootPath(), "--output=json", "--absolute") + + // read and parse the output logs + parsedLogs := make([]map[string]string, 0) + err := json.Unmarshal(stdout.Bytes(), &parsedLogs) + require.NoError(t, err) + + // make assertions on the output logs + assert.Equal(t, stderr.String(), "") + assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "python_file.py"), "FILE") + assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "foo.txt"), "FILE") + assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "python_notebook"), "NOTEBOOK") + assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "my_directory"), "DIRECTORY") +} diff --git a/internal/helpers/workspace_testdata.go b/internal/helpers/workspace_testdata.go new file mode 100644 index 0000000000..8548037801 --- /dev/null +++ b/internal/helpers/workspace_testdata.go @@ -0,0 +1,88 @@ +package helpers + +import ( + "context" + "fmt" + "net/http" + "net/url" + "path" + "strings" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/client" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/require" +) + +type workspaceTestdata struct { + root string + t *testing.T + client *databricks.WorkspaceClient +} + +func NewWorkspaceTestdata(t *testing.T) *workspaceTestdata { + ctx := context.Background() + w := databricks.Must(databricks.NewWorkspaceClient()) + + me, err := w.CurrentUser.Me(ctx) + require.NoError(t, err) + path := fmt.Sprintf("/Users/%s/%s", me.UserName, internal.RandomName("wsfs-files-")) + + // Ensure directory exists, but doesn't exist YET! + // Otherwise we could inadvertently remove a directory that already exists on cleanup. + t.Logf("mkdir %s", path) + err = w.Workspace.MkdirsByPath(ctx, path) + require.NoError(t, err) + + // Remove test directory on test completion. + t.Cleanup(func() { + t.Logf("rm -rf %s", path) + err := w.Workspace.Delete(ctx, workspace.Delete{ + Path: path, + Recursive: true, + }) + if err == nil || apierr.IsMissing(err) { + return + } + t.Logf("unable to remove temporary workspace path %s: %#v", path, err) + }) + + return &workspaceTestdata{ + root: path, + t: t, + client: w, + } +} + +func (w *workspaceTestdata) RootPath() string { + return w.root +} + +func (w *workspaceTestdata) AddFile(name string, content string) { + path := path.Join(w.root, name) + ctx := context.Background() + + // url path for uploading file API + urlPath := fmt.Sprintf( + "/api/2.0/workspace-files/import-file/%s?overwrite=true", + url.PathEscape(strings.TrimLeft(path, "/")), + ) + + // initialize API client + apiClient, err := client.New(w.client.Config) + require.NoError(w.t, err) + + // Make API request + err = apiClient.Do(ctx, http.MethodPost, urlPath, content, nil) + require.NoError(w.t, err) +} + +func (w *workspaceTestdata) Mkdir(name string) { + ctx := context.Background() + path := path.Join(w.root, name) + err := w.client.Workspace.MkdirsByPath(ctx, path) + require.NoError(w.t, err) +} diff --git a/libs/filer/filer.go b/libs/filer/filer.go index e8420b818b..11b9229632 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -59,4 +59,7 @@ type Filer interface { // Return contents of directory at `path` ReadDir(ctx context.Context, path string) ([]FileInfo, error) + + // Creates directory at `path`, creating any intermediate directories as required + Mkdir(ctx context.Context, path string) error } diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 2203ff9bca..a6f0c56ef8 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -154,3 +154,13 @@ func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]File } return info, nil } + +func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { + dirPath, err := w.root.Join(name) + if err != nil { + return err + } + return w.workspaceClient.Workspace.Mkdirs(ctx, workspace.Mkdirs{ + Path: dirPath, + }) +} From d876eaf619e8c7f8ecccdda74b602395734abb56 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 17:16:53 +0200 Subject: [PATCH 6/8] add comments --- libs/filer/filer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/filer/filer.go b/libs/filer/filer.go index 11b9229632..a29970ac75 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -14,6 +14,8 @@ const ( CreateParentDirectories = iota << 1 ) +// This struct is an abstract over file information from different file +// systems like WSFS and DBFS. The names for the fields are inspired from https://pkg.go.dev/io/fs#FileInfo type FileInfo struct { // The type of the file in workspace Type string From c3bc6fd540c748e826cd8dc971c8c8962a79895e Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 17:56:30 +0200 Subject: [PATCH 7/8] alias for fileinfo --- cmd/fs/ls.go | 36 +++++++++++++++--------------------- libs/cmdio/render.go | 4 ++++ 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/cmd/fs/ls.go b/cmd/fs/ls.go index eccdeacba8..0adae4db05 100644 --- a/cmd/fs/ls.go +++ b/cmd/fs/ls.go @@ -1,9 +1,7 @@ package fs import ( - "fmt" "path" - "time" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" @@ -11,17 +9,10 @@ import ( "github.com/spf13/cobra" ) -func parseFileInfo(info filer.FileInfo, parentDir string, isAbsolute bool) map[string]string { - fullName := info.Name - if isAbsolute { - fullName = path.Join(parentDir, info.Name) - } - return map[string]string{ - "Name": fullName, - "ModTime": info.ModTime.UTC().Format(time.UnixDate), - "Size": fmt.Sprint(info.Size), - "Type": info.Type, - } +type FileInfo filer.FileInfo + +func (i *FileInfo) ExpandPath(root string) { + i.Name = path.Join(root, i.Name) } // lsCmd represents the ls command @@ -41,21 +32,21 @@ var lsCmd = &cobra.Command{ `) if longMode { template = cmdio.Heredoc(` - {{range .}}{{.Type|printf "%-10s"}} {{.Size}} {{.ModTime}} {{.Name}} + {{range .}}{{.Type|printf "%-10s"}} {{.Size}} {{.ModTime|unix_date}} {{.Name}} {{end}} `) } // Path to list files from. Defaults to`/` - path := "/" + rootPath := "/" if len(args) > 0 { - path = args[0] + rootPath = args[0] } // Initialize workspace client ctx := cmd.Context() w := root.WorkspaceClient(ctx) - f, err := filer.NewWorkspaceFilesClient(w, path) + f, err := filer.NewWorkspaceFilesClient(w, rootPath) if err != nil { return err } @@ -66,10 +57,13 @@ var lsCmd = &cobra.Command{ return err } - // Parse it so it's ready to be rendered - output := make([]map[string]string, 0) - for _, info := range filesInfo { - output = append(output, parseFileInfo(info, path, absolute)) + // compute output with expanded paths if necessary + output := make([]FileInfo, len(filesInfo)) + for i, v := range filesInfo { + output[i] = FileInfo(v) + if absolute { + output[i].ExpandPath(rootPath) + } } return cmdio.RenderWithTemplate(ctx, output, template) }, diff --git a/libs/cmdio/render.go b/libs/cmdio/render.go index 8aff2b8d2c..70e26d7160 100644 --- a/libs/cmdio/render.go +++ b/libs/cmdio/render.go @@ -6,6 +6,7 @@ import ( "strings" "text/tabwriter" "text/template" + "time" "github.com/fatih/color" "github.com/nwidger/jsoncolor" @@ -86,6 +87,9 @@ func renderTemplate(w io.Writer, tmpl string, v any) error { } return string(b), nil }, + "unix_date": func(t time.Time) string { + return t.UTC().Format(time.UnixDate) + }, }).Parse(tmpl) if err != nil { return err From 95d8590e81aaf536c03b8e359c85c8d79d7fddde Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 26 May 2023 18:00:38 +0200 Subject: [PATCH 8/8] changed alias to a function --- cmd/fs/ls.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/cmd/fs/ls.go b/cmd/fs/ls.go index 0adae4db05..e53a6f43e4 100644 --- a/cmd/fs/ls.go +++ b/cmd/fs/ls.go @@ -9,10 +9,9 @@ import ( "github.com/spf13/cobra" ) -type FileInfo filer.FileInfo - -func (i *FileInfo) ExpandPath(root string) { +func expandPath(i filer.FileInfo, root string) filer.FileInfo { i.Name = path.Join(root, i.Name) + return i } // lsCmd represents the ls command @@ -58,14 +57,12 @@ var lsCmd = &cobra.Command{ } // compute output with expanded paths if necessary - output := make([]FileInfo, len(filesInfo)) - for i, v := range filesInfo { - output[i] = FileInfo(v) - if absolute { - output[i].ExpandPath(rootPath) + if absolute { + for i := range filesInfo { + filesInfo[i] = expandPath(filesInfo[i], rootPath) } } - return cmdio.RenderWithTemplate(ctx, output, template) + return cmdio.RenderWithTemplate(ctx, filesInfo, template) }, }