Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 59 additions & 6 deletions cmd/fs/ls.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,76 @@
package fs

import (
"fmt"
"path"

"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/filer"
"github.com/spf13/cobra"
)

func expandPath(i filer.FileInfo, root string) filer.FileInfo {
i.Name = path.Join(root, i.Name)
return i
}

// lsCmd represents the ls command
var lsCmd = &cobra.Command{
Use: "ls <dir-name>",
Short: "Lists files",
Long: `Lists files`,
Hidden: true,
Use: "ls <dir-name>",
Short: "Lists files",
Long: `Lists files in a DBFS or WSFS directory`,
Args: cobra.MaximumNArgs(1),
Annotations: map[string]string{},
PreRunE: root.MustWorkspaceClient,

RunE: func(cmd *cobra.Command, args []string) error {
return fmt.Errorf("TODO")
// Assign template according to whether -l is specified
template := cmdio.Heredoc(`
{{range .}}{{.Name}}
{{end}}
`)
if longMode {
template = cmdio.Heredoc(`
{{range .}}{{.Type|printf "%-10s"}} {{.Size}} {{.ModTime|unix_date}} {{.Name}}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if we support just the long mode? then the code is more maintainable and template ends up in the annotation

{{end}}
`)
}

// Path to list files from. Defaults to`/`
rootPath := "/"
if len(args) > 0 {
rootPath = args[0]
}

// Initialize workspace client
ctx := cmd.Context()
w := root.WorkspaceClient(ctx)
f, err := filer.NewWorkspaceFilesClient(w, rootPath)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

databricks fs lists DBFS, not the workspace. why do we use workspace files?

if err != nil {
return err
}

// Get file info
filesInfo, err := f.ReadDir(ctx, "")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should pass path.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well no right? since the filer is rooted at the arg[0] already

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, you're right. Could you modify this to . to make it explicit, and include a comment saying so?

if err != nil {
return err
}

// compute output with expanded paths if necessary
if absolute {
for i := range filesInfo {
filesInfo[i] = expandPath(filesInfo[i], rootPath)
}
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could alias the FileInfo struct and add a couple helper functions to avoid creating a map[string]string. Those helpers would then also be more easily testable.

return cmdio.RenderWithTemplate(ctx, filesInfo, template)
},
}

var longMode bool
var absolute bool

func init() {
lsCmd.Flags().BoolVarP(&longMode, "long", "l", false, "Displays full information including size, file type and modification time since Epoch in milliseconds.")
lsCmd.Flags().BoolVar(&absolute, "absolute", false, "Displays absolute paths.")
fsCmd.AddCommand(lsCmd)
}
85 changes: 85 additions & 0 deletions internal/fs/ls_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package fs

import (
"encoding/json"
"fmt"
"path"
"testing"

_ "github.com/databricks/cli/cmd/fs"
"github.com/databricks/cli/internal"
"github.com/databricks/cli/internal/helpers"
"github.com/databricks/cli/libs/cmdio"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func assertObjectListed(t *testing.T, parsedLogs []map[string]string, name string, objectType string) {
foundFile := false
for _, v := range parsedLogs {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are these parsed logs? Looking at the code I see []filer.FileInfo.

if v["Name"] != name {
continue
}
foundFile = true
assert.Equal(t, objectType, v["Type"])
}
assert.True(t, foundFile, fmt.Sprintf("failed to find file %s in output logs", name))
}

func TestAccFsLs(t *testing.T) {
t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV"))

// setup some testdata in the workspace
w := helpers.NewWorkspaceTestdata(t)
w.AddFile("foo.txt", `hello, world`)
w.AddFile("python_notebook.py", cmdio.Heredoc(`
#Databricks notebook source
print(2)`))
w.AddFile("python_file.py", `print(1)`)
w.Mkdir("my_directory")
w.AddFile("my_directory/.gitkeep", "")

// run list command
stdout, stderr := internal.RequireSuccessfulRun(t, "fs", "ls", w.RootPath(), "--output=json")

// read and parse the output logs
parsedLogs := make([]map[string]string, 0)
err := json.Unmarshal(stdout.Bytes(), &parsedLogs)
require.NoError(t, err)

// make assertions on the output logs
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please refer to just "output". The "logs" suffix confuses it with our logging infrastructure.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, we need a couple of type-safe CLI run test helpers

assert.Equal(t, stderr.String(), "")
assertObjectListed(t, parsedLogs, "python_file.py", "FILE")
assertObjectListed(t, parsedLogs, "foo.txt", "FILE")
assertObjectListed(t, parsedLogs, "python_notebook", "NOTEBOOK")
assertObjectListed(t, parsedLogs, "my_directory", "DIRECTORY")
}

func TestAccFsLsWithAbsoluteFlag(t *testing.T) {
t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV"))

// setup some testdata in the workspace
w := helpers.NewWorkspaceTestdata(t)
w.AddFile("foo.txt", `hello, world`)
w.AddFile("python_notebook.py", cmdio.Heredoc(`
#Databricks notebook source
print(2)`))
w.AddFile("python_file.py", `print(1)`)
w.Mkdir("my_directory")
w.AddFile("my_directory/.gitkeep", "")

// run list command
stdout, stderr := internal.RequireSuccessfulRun(t, "fs", "ls", w.RootPath(), "--output=json", "--absolute")

// read and parse the output logs
parsedLogs := make([]map[string]string, 0)
err := json.Unmarshal(stdout.Bytes(), &parsedLogs)
require.NoError(t, err)

// make assertions on the output logs
assert.Equal(t, stderr.String(), "")
assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "python_file.py"), "FILE")
assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "foo.txt"), "FILE")
assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "python_notebook"), "NOTEBOOK")
assertObjectListed(t, parsedLogs, path.Join(w.RootPath(), "my_directory"), "DIRECTORY")
}
88 changes: 88 additions & 0 deletions internal/helpers/workspace_testdata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package helpers

import (
"context"
"fmt"
"net/http"
"net/url"
"path"
"strings"
"testing"

"github.com/databricks/cli/internal"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/client"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/stretchr/testify/require"
)

type workspaceTestdata struct {
root string
t *testing.T
client *databricks.WorkspaceClient
}

func NewWorkspaceTestdata(t *testing.T) *workspaceTestdata {
ctx := context.Background()
w := databricks.Must(databricks.NewWorkspaceClient())

me, err := w.CurrentUser.Me(ctx)
require.NoError(t, err)
path := fmt.Sprintf("/Users/%s/%s", me.UserName, internal.RandomName("wsfs-files-"))

// Ensure directory exists, but doesn't exist YET!
// Otherwise we could inadvertently remove a directory that already exists on cleanup.
t.Logf("mkdir %s", path)
err = w.Workspace.MkdirsByPath(ctx, path)
require.NoError(t, err)

// Remove test directory on test completion.
t.Cleanup(func() {
t.Logf("rm -rf %s", path)
err := w.Workspace.Delete(ctx, workspace.Delete{
Path: path,
Recursive: true,
})
if err == nil || apierr.IsMissing(err) {
return
}
t.Logf("unable to remove temporary workspace path %s: %#v", path, err)
})

return &workspaceTestdata{
root: path,
t: t,
client: w,
}
}

func (w *workspaceTestdata) RootPath() string {
return w.root
}

func (w *workspaceTestdata) AddFile(name string, content string) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also add a test for DBFS?

path := path.Join(w.root, name)
ctx := context.Background()

// url path for uploading file API
urlPath := fmt.Sprintf(
"/api/2.0/workspace-files/import-file/%s?overwrite=true",
url.PathEscape(strings.TrimLeft(path, "/")),
)

// initialize API client
apiClient, err := client.New(w.client.Config)
require.NoError(w.t, err)

// Make API request
err = apiClient.Do(ctx, http.MethodPost, urlPath, content, nil)
require.NoError(w.t, err)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is equivalent to what filer.Filer does. Please reuse that instead of a separate impl.

}

func (w *workspaceTestdata) Mkdir(name string) {
ctx := context.Background()
path := path.Join(w.root, name)
err := w.client.Workspace.MkdirsByPath(ctx, path)
require.NoError(w.t, err)
}
12 changes: 12 additions & 0 deletions libs/cmdio/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,18 @@ func (c *cmdIO) Render(v any) error {
}
}

func RenderWithTemplate(ctx context.Context, v any, template string) error {
c := fromContext(ctx)
switch c.outputFormat {
case flags.OutputJSON:
return renderJson(c.out, v)
case flags.OutputText:
return renderTemplate(c.out, template, v)
default:
return fmt.Errorf("invalid output format: %s", c.outputFormat)
}
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can set the "template" annotation on the command to achieve the same result. Then this function doesn't need to be added.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried that first, does not work because the cmdIO object is initialized in RootCmd, so overriding annotation does not work

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shreyas-goenka please add this as a comment in the command and to this function.

Also, please make Render to call RenderWithTemplate with c.template, otherwise we have two code paths for the same thing

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in followup

func Render(ctx context.Context, v any) error {
c := fromContext(ctx)
return c.Render(v)
Expand Down
4 changes: 4 additions & 0 deletions libs/cmdio/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"
"text/tabwriter"
"text/template"
"time"

"github.com/fatih/color"
"github.com/nwidger/jsoncolor"
Expand Down Expand Up @@ -86,6 +87,9 @@ func renderTemplate(w io.Writer, tmpl string, v any) error {
}
return string(b), nil
},
"unix_date": func(t time.Time) string {
return t.UTC().Format(time.UnixDate)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've looked at the output - it's very difficult to parse when piped through aws. use the 2006-01-02T15:04:05Z as a format.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in the followup PR

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in the followup PR

},
}).Parse(tmpl)
if err != nil {
return err
Expand Down
23 changes: 23 additions & 0 deletions libs/filer/filer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"io"
"time"
)

type WriteMode int
Expand All @@ -13,6 +14,22 @@ const (
CreateParentDirectories = iota << 1
)

// This struct is an abstract over file information from different file
// systems like WSFS and DBFS. The names for the fields are inspired from https://pkg.go.dev/io/fs#FileInfo
type FileInfo struct {
// The type of the file in workspace
Type string

// Base name of the file
Name string

// Size in bytes of the file
Size int64

// Last Modified time of the file
ModTime time.Time
}

type FileAlreadyExistsError struct {
path string
}
Expand Down Expand Up @@ -41,4 +58,10 @@ type Filer interface {

// Delete file at `path`.
Delete(ctx context.Context, path string) error

// Return contents of directory at `path`
ReadDir(ctx context.Context, path string) ([]FileInfo, error)

// Creates directory at `path`, creating any intermediate directories as required
Mkdir(ctx context.Context, path string) error
}
5 changes: 0 additions & 5 deletions libs/filer/root_path.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,5 @@ func (p *RootPath) Join(name string) (string, error) {
return "", fmt.Errorf("relative path escapes root: %s", name)
}

// Don't allow name to resolve to the root path.
if strings.TrimPrefix(absPath, p.rootPath) == "" {
return "", fmt.Errorf("relative path resolves to root: %s", name)
}

return absPath, nil
}
35 changes: 20 additions & 15 deletions libs/filer/root_path_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,26 @@ func testRootPath(t *testing.T, uncleanRoot string) {
assert.NoError(t, err)
assert.Equal(t, cleanRoot+"/a/b/f/g", remotePath)

remotePath, err = rp.Join(".//a/..//./b/..")
assert.NoError(t, err)
assert.Equal(t, cleanRoot, remotePath)

remotePath, err = rp.Join("a/b/../..")
assert.NoError(t, err)
assert.Equal(t, cleanRoot, remotePath)

remotePath, err = rp.Join("")
assert.NoError(t, err)
assert.Equal(t, cleanRoot, remotePath)

remotePath, err = rp.Join(".")
assert.NoError(t, err)
assert.Equal(t, cleanRoot, remotePath)

remotePath, err = rp.Join("/")
assert.NoError(t, err)
assert.Equal(t, cleanRoot, remotePath)

_, err = rp.Join("..")
assert.ErrorContains(t, err, `relative path escapes root: ..`)

Expand All @@ -57,21 +77,6 @@ func testRootPath(t *testing.T, uncleanRoot string) {

_, err = rp.Join("../..")
assert.ErrorContains(t, err, `relative path escapes root: ../..`)

_, err = rp.Join(".//a/..//./b/..")
assert.ErrorContains(t, err, `relative path resolves to root: .//a/..//./b/..`)

_, err = rp.Join("a/b/../..")
assert.ErrorContains(t, err, "relative path resolves to root: a/b/../..")

_, err = rp.Join("")
assert.ErrorContains(t, err, "relative path resolves to root: ")

_, err = rp.Join(".")
assert.ErrorContains(t, err, "relative path resolves to root: .")

_, err = rp.Join("/")
assert.ErrorContains(t, err, "relative path resolves to root: /")
}

func TestRootPathClean(t *testing.T) {
Expand Down
Loading