diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 0b7c04d6d1..582a88479f 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -12,27 +12,69 @@ import ( "github.com/databricks/databricks-sdk-go/service/workspace" ) +// FileInfoWithWorkspaceObjectInfo is an interface implemented by [fs.FileInfo] values that +// contain a file's underlying [workspace.ObjectInfo]. +// +// This may be the case when working with a [filer.Filer] backed by the workspace API. +// For these files we do not need to read a file's header to know if it is a notebook; +// we can use the [workspace.ObjectInfo] value directly. +type FileInfoWithWorkspaceObjectInfo interface { + WorkspaceObjectInfo() workspace.ObjectInfo +} + // Maximum length in bytes of the notebook header. const headerLength = 32 -// readHeader reads the first N bytes from a file. -func readHeader(fsys fs.FS, name string) ([]byte, error) { +// file wraps an fs.File and implements a few helper methods such that +// they don't need to be inlined in the [DetectWithFS] function below. +type file struct { + f fs.File +} + +func openFile(fsys fs.FS, name string) (*file, error) { f, err := fsys.Open(name) if err != nil { return nil, err } - defer f.Close() + return &file{f: f}, nil +} + +func (f file) close() error { + return f.f.Close() +} +func (f file) readHeader() (string, error) { // Scan header line with some padding. var buf = make([]byte, headerLength) - n, err := f.Read([]byte(buf)) + n, err := f.f.Read([]byte(buf)) if err != nil && err != io.EOF { - return nil, err + return "", err } // Trim buffer to actual read bytes. - return buf[:n], nil + buf = buf[:n] + + // Read the first line from the buffer. + scanner := bufio.NewScanner(bytes.NewReader(buf)) + scanner.Scan() + return scanner.Text(), nil +} + +// getObjectInfo returns the [workspace.ObjectInfo] for the file if it is +// part of the [fs.FileInfo] value returned by the [fs.Stat] call. +func (f file) getObjectInfo() (oi workspace.ObjectInfo, ok bool, err error) { + stat, err := f.f.Stat() + if err != nil { + return workspace.ObjectInfo{}, false, err + } + + // Use object info if available. + if i, ok := stat.(FileInfoWithWorkspaceObjectInfo); ok { + return i.WorkspaceObjectInfo(), true, nil + } + + return workspace.ObjectInfo{}, false, nil } // Detect returns whether the file at path is a Databricks notebook. @@ -40,13 +82,27 @@ func readHeader(fsys fs.FS, name string) ([]byte, error) { func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.Language, err error) { header := "" - buf, err := readHeader(fsys, name) + f, err := openFile(fsys, name) + if err != nil { + return false, "", err + } + + defer f.close() + + // Use object info if available. + oi, ok, err := f.getObjectInfo() + if err != nil { + return false, "", err + } + if ok { + return oi.ObjectType == workspace.ObjectTypeNotebook, oi.Language, nil + } + + // Read the first line of the file. + fileHeader, err := f.readHeader() if err != nil { return false, "", err } - scanner := bufio.NewScanner(bytes.NewReader(buf)) - scanner.Scan() - fileHeader := scanner.Text() // Determine which header to expect based on filename extension. ext := strings.ToLower(filepath.Ext(name)) diff --git a/libs/notebook/detect_test.go b/libs/notebook/detect_test.go index fd3337579c..ad89d6dd53 100644 --- a/libs/notebook/detect_test.go +++ b/libs/notebook/detect_test.go @@ -99,3 +99,21 @@ func TestDetectFileWithLongHeader(t *testing.T) { require.NoError(t, err) assert.False(t, nb) } + +func TestDetectWithObjectInfo(t *testing.T) { + fakeFS := &fakeFS{ + fakeFile{ + fakeFileInfo{ + workspace.ObjectInfo{ + ObjectType: workspace.ObjectTypeNotebook, + Language: workspace.LanguagePython, + }, + }, + }, + } + + nb, lang, err := DetectWithFS(fakeFS, "doesntmatter") + require.NoError(t, err) + assert.True(t, nb) + assert.Equal(t, workspace.LanguagePython, lang) +} diff --git a/libs/notebook/fakefs_test.go b/libs/notebook/fakefs_test.go new file mode 100644 index 0000000000..4ac135dd4a --- /dev/null +++ b/libs/notebook/fakefs_test.go @@ -0,0 +1,77 @@ +package notebook + +import ( + "fmt" + "io/fs" + "time" + + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +type fakeFS struct { + fakeFile +} + +type fakeFile struct { + fakeFileInfo +} + +func (f fakeFile) Close() error { + return nil +} + +func (f fakeFile) Read(p []byte) (n int, err error) { + return 0, fmt.Errorf("not implemented") +} + +func (f fakeFile) Stat() (fs.FileInfo, error) { + return f.fakeFileInfo, nil +} + +type fakeFileInfo struct { + oi workspace.ObjectInfo +} + +func (f fakeFileInfo) WorkspaceObjectInfo() workspace.ObjectInfo { + return f.oi +} + +func (f fakeFileInfo) Name() string { + return "" +} + +func (f fakeFileInfo) Size() int64 { + return 0 +} + +func (f fakeFileInfo) Mode() fs.FileMode { + return 0 +} + +func (f fakeFileInfo) ModTime() time.Time { + return time.Time{} +} + +func (f fakeFileInfo) IsDir() bool { + return false +} + +func (f fakeFileInfo) Sys() any { + return nil +} + +func (f fakeFS) Open(name string) (fs.File, error) { + return f.fakeFile, nil +} + +func (f fakeFS) Stat(name string) (fs.FileInfo, error) { + panic("not implemented") +} + +func (f fakeFS) ReadDir(name string) ([]fs.DirEntry, error) { + panic("not implemented") +} + +func (f fakeFS) ReadFile(name string) ([]byte, error) { + panic("not implemented") +}