From 2cf7acfa067238786ddc2620473eb61d80bad5ce Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Tue, 23 May 2023 11:56:14 +0200 Subject: [PATCH 1/3] Sync: Gracefully handle broken notebook files --- libs/sync/snapshot.go | 16 +++++++-------- libs/sync/snapshot_test.go | 40 +++++++++++++++++++++++++------------- libs/sync/sync.go | 2 +- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/libs/sync/snapshot.go b/libs/sync/snapshot.go index b79202e7d9..3a8db0cfcb 100644 --- a/libs/sync/snapshot.go +++ b/libs/sync/snapshot.go @@ -172,7 +172,7 @@ func loadOrNewSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error return snapshot, nil } -func (s *Snapshot) diff(all []fileset.File) (change diff, err error) { +func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (change diff, err error) { lastModifiedTimes := s.LastUpdatedTimes remoteToLocalNames := s.RemoteToLocalNames localToRemoteNames := s.LocalToRemoteNames @@ -191,18 +191,18 @@ func (s *Snapshot) diff(all []fileset.File) (change diff, err error) { if !seen || modified.After(lastSeenModified) { lastModifiedTimes[f.Relative] = modified - // change separators to '/' for file paths in remote store - unixFileName := filepath.ToSlash(f.Relative) - - // put file in databricks workspace - change.put = append(change.put, unixFileName) - // get file metadata about whether it's a notebook isNotebook, _, err := notebook.Detect(f.Absolute) if err != nil { - return change, err + log.Warnf(ctx, err.Error()) + continue } + // change separators to '/' for file paths in remote store + unixFileName := filepath.ToSlash(f.Relative) + // put file in databricks workspace + change.put = append(change.put, unixFileName) + // Strip extension for notebooks. remoteName := unixFileName if isNotebook { diff --git a/libs/sync/snapshot_test.go b/libs/sync/snapshot_test.go index 54cb86fc55..8154b79141 100644 --- a/libs/sync/snapshot_test.go +++ b/libs/sync/snapshot_test.go @@ -25,6 +25,8 @@ func assertKeysOfMap(t *testing.T, m map[string]time.Time, expectedKeys []string } func TestDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -44,7 +46,7 @@ func TestDiff(t *testing.T) { // New files are put files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 2) @@ -59,7 +61,7 @@ func TestDiff(t *testing.T) { assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) @@ -74,7 +76,7 @@ func TestDiff(t *testing.T) { assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) @@ -85,6 +87,8 @@ func TestDiff(t *testing.T) { } func TestSymlinkDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -106,12 +110,14 @@ func TestSymlinkDiff(t *testing.T) { files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.put, 1) } func TestFolderDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -130,7 +136,7 @@ func TestFolderDiff(t *testing.T) { files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) @@ -139,7 +145,7 @@ func TestFolderDiff(t *testing.T) { f1.Remove(t) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) @@ -147,6 +153,8 @@ func TestFolderDiff(t *testing.T) { } func TestPythonNotebookDiff(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -164,7 +172,7 @@ func TestPythonNotebookDiff(t *testing.T) { files, err := fileSet.All() assert.NoError(t, err) foo.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")") - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) @@ -178,7 +186,7 @@ func TestPythonNotebookDiff(t *testing.T) { foo.Overwrite(t, "print(\"abc\")") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 1) @@ -192,7 +200,7 @@ func TestPythonNotebookDiff(t *testing.T) { foo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 1) @@ -207,7 +215,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) @@ -218,6 +226,8 @@ func TestPythonNotebookDiff(t *testing.T) { } func TestErrorWhenIdenticalRemoteName(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -235,7 +245,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { defer vanillaFoo.Close(t) files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 2) @@ -246,11 +256,13 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { pythonFoo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.ErrorContains(t, err, "both foo and foo.py point to the same remote file location foo. Please remove one of them from your local project") } func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { + ctx := context.Background() + // Create temp project dir projectDir := t.TempDir() fileSet, err := git.NewFileSet(projectDir) @@ -267,7 +279,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { pythonFoo.Overwrite(t, "# Databricks notebook source\n") files, err := fileSet.All() assert.NoError(t, err) - change, err := state.diff(files) + change, err := state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) @@ -279,7 +291,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { sqlFoo.Overwrite(t, "-- Databricks notebook source\n") files, err = fileSet.All() assert.NoError(t, err) - change, err = state.diff(files) + change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.put, 1) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 64c2328ace..54d0624e77 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -134,7 +134,7 @@ func (s *Sync) RunOnce(ctx context.Context) error { return err } - change, err := s.snapshot.diff(all) + change, err := s.snapshot.diff(ctx, all) if err != nil { return err } From 551cb7ce3b7cb8b481e129ebfa607cf348dff05d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 23 May 2023 12:05:04 +0200 Subject: [PATCH 2/3] Update libs/sync/snapshot.go --- libs/sync/snapshot.go | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/sync/snapshot.go b/libs/sync/snapshot.go index 3a8db0cfcb..e6a64ab12a 100644 --- a/libs/sync/snapshot.go +++ b/libs/sync/snapshot.go @@ -200,6 +200,7 @@ func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (change diff, e // change separators to '/' for file paths in remote store unixFileName := filepath.ToSlash(f.Relative) + // put file in databricks workspace change.put = append(change.put, unixFileName) From cf4039f7ac329c960c01d47dd19437f52a3d9198 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 23 May 2023 12:05:08 +0200 Subject: [PATCH 3/3] Update libs/sync/snapshot.go --- libs/sync/snapshot.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/sync/snapshot.go b/libs/sync/snapshot.go index e6a64ab12a..1ea7b18bdb 100644 --- a/libs/sync/snapshot.go +++ b/libs/sync/snapshot.go @@ -194,6 +194,8 @@ func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (change diff, e // get file metadata about whether it's a notebook isNotebook, _, err := notebook.Detect(f.Absolute) if err != nil { + // Ignore this file if we're unable to determine the notebook type. + // Trying to upload such a file to the workspace would fail anyway. log.Warnf(ctx, err.Error()) continue }