From 63b8105751b8a8c62e26553995005baf41b69c6a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 18 Jul 2024 15:34:08 +0200 Subject: [PATCH 1/3] Add tests for the Workspace API readahead cache --- libs/filer/workspace_files_cache_test.go | 278 +++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 libs/filer/workspace_files_cache_test.go diff --git a/libs/filer/workspace_files_cache_test.go b/libs/filer/workspace_files_cache_test.go new file mode 100644 index 0000000000..f81528e342 --- /dev/null +++ b/libs/filer/workspace_files_cache_test.go @@ -0,0 +1,278 @@ +package filer + +import ( + "context" + "fmt" + "io" + "io/fs" + "testing" + + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var errNotImplemented = fmt.Errorf("not implemented") + +type cacheTestFiler struct { + calls int + + readDir map[string][]fs.DirEntry + stat map[string]fs.FileInfo +} + +func (m *cacheTestFiler) Write(ctx context.Context, path string, reader io.Reader, mode ...WriteMode) error { + return errNotImplemented +} + +func (m *cacheTestFiler) Read(ctx context.Context, path string) (io.ReadCloser, error) { + return nil, errNotImplemented +} + +func (m *cacheTestFiler) Delete(ctx context.Context, path string, mode ...DeleteMode) error { + return errNotImplemented +} + +func (m *cacheTestFiler) ReadDir(ctx context.Context, path string) ([]fs.DirEntry, error) { + m.calls++ + if fi, ok := m.readDir[path]; ok { + delete(m.readDir, path) + return fi, nil + } + return nil, fs.ErrNotExist +} + +func (m *cacheTestFiler) Mkdir(ctx context.Context, path string) error { + return errNotImplemented +} + +func (m *cacheTestFiler) Stat(ctx context.Context, name string) (fs.FileInfo, error) { + m.calls++ + if fi, ok := m.stat[name]; ok { + delete(m.stat, name) + return fi, nil + } + return nil, fs.ErrNotExist +} + +func TestWorkspaceFilesCache_ReadDirCache(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "dir1": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file1", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file2", + Size: 2, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + }, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // First read dir should hit the filer, second should hit the cache. + for range 2 { + fi, err := cache.ReadDir(ctx, "dir1") + require.NoError(t, err) + if assert.Len(t, fi, 2) { + assert.Equal(t, "file1", fi[0].Name()) + assert.Equal(t, "file2", fi[1].Name()) + } + + // Modify the slice to check that mutations are not reflected in the cache. + fi[0] = nil + fi[1] = nil + } + + // Third stat should hit the filer, fourth should hit the cache. + for range 2 { + _, err := cache.ReadDir(ctx, "dir2") + assert.ErrorIs(t, err, fs.ErrNotExist) + } + + // Assert we only called the filer twice. + assert.Equal(t, 2, f.calls) +} + +func TestWorkspaceFilesCache_StatCache(t *testing.T) { + f := &cacheTestFiler{ + stat: map[string]fs.FileInfo{ + "file1": &wsfsFileInfo{ObjectInfo: workspace.ObjectInfo{Path: "file1", Size: 1}}, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // First stat should hit the filer, second should hit the cache. + for range 2 { + fi, err := cache.Stat(ctx, "file1") + require.NoError(t, err) + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + } + + // Third stat should hit the filer, fourth should hit the cache. + for range 2 { + _, err := cache.Stat(ctx, "file2") + assert.ErrorIs(t, err, fs.ErrNotExist) + } + + // Assert we only called the filer twice. + assert.Equal(t, 2, f.calls) +} + +func TestWorkspaceFilesCache_ReadDirPopulatesStatCache(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "dir1": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file1", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file2", + Size: 2, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "notebook1", + Size: 1, + ObjectType: workspace.ObjectTypeNotebook, + }, + ReposExportFormat: "this should not end up in the stat cache", + }, + }, + }, + }, + stat: map[string]fs.FileInfo{ + "dir1/notebook1": wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "notebook1", + Size: 1, + ObjectType: workspace.ObjectTypeNotebook, + }, + ReposExportFormat: workspace.ExportFormatJupyter, + }, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // Issue read dir to populate the stat cache. + _, err := cache.ReadDir(ctx, "dir1") + require.NoError(t, err) + + // Stat on a file in the directory should hit the cache. + fi, err := cache.Stat(ctx, "dir1/file1") + require.NoError(t, err) + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + + // If the containing directory has been read, absence is also inferred from the cache. + _, err = cache.Stat(ctx, "dir1/file3") + assert.ErrorIs(t, err, fs.ErrNotExist) + + // Stat on a notebook in the directory should have been performed in the background. + fi, err = cache.Stat(ctx, "dir1/notebook1") + require.NoError(t, err) + assert.Equal(t, "notebook1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + assert.Equal(t, workspace.ExportFormatJupyter, fi.(wsfsFileInfo).ReposExportFormat) + + // Assert we called the filer twice (once for read dir, once for stat on the notebook). + assert.Equal(t, 2, f.calls) +} + +func TestWorkspaceFilesCache_ReadDirTriggersReadahead(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "a": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "b1", + ObjectType: workspace.ObjectTypeDirectory, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "b2", + ObjectType: workspace.ObjectTypeDirectory, + }, + }, + }, + }, + "a/b1": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file1", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + }, + "a/b2": {}, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // Issue read dir to populate the stat cache. + _, err := cache.ReadDir(ctx, "a") + require.NoError(t, err) + + // Stat on a directory in the directory should hit the cache. + fi, err := cache.Stat(ctx, "a/b1") + require.NoError(t, err) + assert.Equal(t, "b1", fi.Name()) + assert.True(t, fi.IsDir()) + + // Stat on a file in a nested directory should hit the cache. + fi, err = cache.Stat(ctx, "a/b1/file1") + require.NoError(t, err) + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + + // Stat on a non-existing file in an empty nested directory should hit the cache. + _, err = cache.Stat(ctx, "a/b2/file2") + assert.ErrorIs(t, err, fs.ErrNotExist) + + // Assert we called the filer 3 times; once for each directory. + assert.Equal(t, 3, f.calls) +} From 7896eb9077a11b2b74eef87e6ed66facdd05d88e Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 18 Jul 2024 15:38:17 +0200 Subject: [PATCH 2/3] Conditional asserts --- libs/filer/workspace_files_cache_test.go | 44 +++++++++++++----------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/libs/filer/workspace_files_cache_test.go b/libs/filer/workspace_files_cache_test.go index f81528e342..8446f5725d 100644 --- a/libs/filer/workspace_files_cache_test.go +++ b/libs/filer/workspace_files_cache_test.go @@ -9,7 +9,6 @@ import ( "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) var errNotImplemented = fmt.Errorf("not implemented") @@ -88,7 +87,7 @@ func TestWorkspaceFilesCache_ReadDirCache(t *testing.T) { // First read dir should hit the filer, second should hit the cache. for range 2 { fi, err := cache.ReadDir(ctx, "dir1") - require.NoError(t, err) + assert.NoError(t, err) if assert.Len(t, fi, 2) { assert.Equal(t, "file1", fi[0].Name()) assert.Equal(t, "file2", fi[1].Name()) @@ -123,9 +122,10 @@ func TestWorkspaceFilesCache_StatCache(t *testing.T) { // First stat should hit the filer, second should hit the cache. for range 2 { fi, err := cache.Stat(ctx, "file1") - require.NoError(t, err) - assert.Equal(t, "file1", fi.Name()) - assert.Equal(t, int64(1), fi.Size()) + if assert.NoError(t, err) { + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + } } // Third stat should hit the filer, fourth should hit the cache. @@ -190,13 +190,14 @@ func TestWorkspaceFilesCache_ReadDirPopulatesStatCache(t *testing.T) { // Issue read dir to populate the stat cache. _, err := cache.ReadDir(ctx, "dir1") - require.NoError(t, err) + assert.NoError(t, err) // Stat on a file in the directory should hit the cache. fi, err := cache.Stat(ctx, "dir1/file1") - require.NoError(t, err) - assert.Equal(t, "file1", fi.Name()) - assert.Equal(t, int64(1), fi.Size()) + if assert.NoError(t, err) { + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + } // If the containing directory has been read, absence is also inferred from the cache. _, err = cache.Stat(ctx, "dir1/file3") @@ -204,10 +205,11 @@ func TestWorkspaceFilesCache_ReadDirPopulatesStatCache(t *testing.T) { // Stat on a notebook in the directory should have been performed in the background. fi, err = cache.Stat(ctx, "dir1/notebook1") - require.NoError(t, err) - assert.Equal(t, "notebook1", fi.Name()) - assert.Equal(t, int64(1), fi.Size()) - assert.Equal(t, workspace.ExportFormatJupyter, fi.(wsfsFileInfo).ReposExportFormat) + if assert.NoError(t, err) { + assert.Equal(t, "notebook1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + assert.Equal(t, workspace.ExportFormatJupyter, fi.(wsfsFileInfo).ReposExportFormat) + } // Assert we called the filer twice (once for read dir, once for stat on the notebook). assert.Equal(t, 2, f.calls) @@ -255,19 +257,21 @@ func TestWorkspaceFilesCache_ReadDirTriggersReadahead(t *testing.T) { // Issue read dir to populate the stat cache. _, err := cache.ReadDir(ctx, "a") - require.NoError(t, err) + assert.NoError(t, err) // Stat on a directory in the directory should hit the cache. fi, err := cache.Stat(ctx, "a/b1") - require.NoError(t, err) - assert.Equal(t, "b1", fi.Name()) - assert.True(t, fi.IsDir()) + if assert.NoError(t, err) { + assert.Equal(t, "b1", fi.Name()) + assert.True(t, fi.IsDir()) + } // Stat on a file in a nested directory should hit the cache. fi, err = cache.Stat(ctx, "a/b1/file1") - require.NoError(t, err) - assert.Equal(t, "file1", fi.Name()) - assert.Equal(t, int64(1), fi.Size()) + if assert.NoError(t, err) { + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + } // Stat on a non-existing file in an empty nested directory should hit the cache. _, err = cache.Stat(ctx, "a/b2/file2") From 380c560e875b9da0f83541132e251464efe8b530 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 19 Jul 2024 08:55:27 +0200 Subject: [PATCH 3/3] Split out test for cache isolation --- libs/filer/workspace_files_cache_test.go | 45 +++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/libs/filer/workspace_files_cache_test.go b/libs/filer/workspace_files_cache_test.go index 8446f5725d..8983c59822 100644 --- a/libs/filer/workspace_files_cache_test.go +++ b/libs/filer/workspace_files_cache_test.go @@ -92,10 +92,6 @@ func TestWorkspaceFilesCache_ReadDirCache(t *testing.T) { assert.Equal(t, "file1", fi[0].Name()) assert.Equal(t, "file2", fi[1].Name()) } - - // Modify the slice to check that mutations are not reflected in the cache. - fi[0] = nil - fi[1] = nil } // Third stat should hit the filer, fourth should hit the cache. @@ -108,6 +104,47 @@ func TestWorkspaceFilesCache_ReadDirCache(t *testing.T) { assert.Equal(t, 2, f.calls) } +func TestWorkspaceFilesCache_ReadDirCacheIsolation(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "dir": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + }, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // First read dir should hit the filer, second should hit the cache. + entries, err := cache.ReadDir(ctx, "dir") + assert.NoError(t, err) + assert.Equal(t, "file", entries[0].Name()) + + // Modify the entry to check that mutations are not reflected in the cache. + entries[0] = wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "tainted", + }, + }, + } + + // Read the directory again to check that the cache is isolated. + entries, err = cache.ReadDir(ctx, "dir") + assert.NoError(t, err) + assert.Equal(t, "file", entries[0].Name()) +} + func TestWorkspaceFilesCache_StatCache(t *testing.T) { f := &cacheTestFiler{ stat: map[string]fs.FileInfo{