From 2cd07b69bc2ca231ad041176d03611ef7536adb5 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 9 Oct 2023 17:07:58 +0200 Subject: [PATCH 1/4] Support .gitignore syntax in sync section and make sure it works recursively --- libs/fileset/glob.go | 36 +++++---------- libs/fileset/glob_test.go | 94 +++++++++++++++++++++++++++++++++++++++ libs/fileset/ignorer.go | 29 ++++++++++++ libs/sync/sync_test.go | 53 ++++++++++++++++++++-- 4 files changed, 182 insertions(+), 30 deletions(-) diff --git a/libs/fileset/glob.go b/libs/fileset/glob.go index 7a9f130bde..c160204605 100644 --- a/libs/fileset/glob.go +++ b/libs/fileset/glob.go @@ -1,12 +1,11 @@ package fileset import ( - "io/fs" - "os" "path/filepath" ) type GlobSet struct { + fs *FileSet root string patterns []string } @@ -16,34 +15,19 @@ func NewGlobSet(root string, includes []string) (*GlobSet, error) { if err != nil { return nil, err } + for k := range includes { - includes[k] = filepath.Join(absRoot, filepath.FromSlash(includes[k])) + includes[k] = filepath.Clean(includes[k]) + } + + fs := &FileSet{ + absRoot, + newIncluder(includes), } - return &GlobSet{absRoot, includes}, nil + return &GlobSet{fs, absRoot, includes}, nil } // Return all files which matches defined glob patterns func (s *GlobSet) All() ([]File, error) { - files := make([]File, 0) - for _, pattern := range s.patterns { - matches, err := filepath.Glob(pattern) - if err != nil { - return files, err - } - - for _, match := range matches { - matchRel, err := filepath.Rel(s.root, match) - if err != nil { - return files, err - } - - stat, err := os.Stat(match) - if err != nil { - return files, err - } - files = append(files, File{fs.FileInfoToDirEntry(stat), match, matchRel}) - } - } - - return files, nil + return s.fs.All() } diff --git a/libs/fileset/glob_test.go b/libs/fileset/glob_test.go index f6ac7e1929..e8d3696c48 100644 --- a/libs/fileset/glob_test.go +++ b/libs/fileset/glob_test.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" "github.com/stretchr/testify/require" @@ -63,3 +64,96 @@ func TestGlobFilesetWithRelativeRoot(t *testing.T) { require.True(t, filepath.IsAbs(f.Absolute)) } } + +func TestGlobFilesetRecursively(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + root := filepath.Join(cwd, "..", "git") + + entries := make([]string, 0) + err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error { + if !info.IsDir() { + entries = append(entries, path) + } + return nil + }) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "testdata/*", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + exists := slices.ContainsFunc(entries, func(path string) bool { + return path == f.Absolute + }) + require.True(t, exists) + } +} + +func TestGlobFilesetDir(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + root := filepath.Join(cwd, "..", "git") + + entries := make([]string, 0) + err = filepath.Walk(filepath.Join(root, "testdata", "a"), func(path string, info fs.FileInfo, err error) error { + if !info.IsDir() { + entries = append(entries, path) + } + return nil + }) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "testdata/a", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + exists := slices.ContainsFunc(entries, func(path string) bool { + return path == f.Absolute + }) + require.True(t, exists) + } +} + +func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + root := filepath.Join(cwd, "..", "git") + + entries := make([]string, 0) + err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error { + if strings.HasSuffix(path, ".txt") { + entries = append(entries, path) + } + return nil + }) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "testdata/**/*.txt", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + exists := slices.ContainsFunc(entries, func(path string) bool { + return path == f.Absolute + }) + require.True(t, exists) + } +} diff --git a/libs/fileset/ignorer.go b/libs/fileset/ignorer.go index ba066f416d..5e8a2220be 100644 --- a/libs/fileset/ignorer.go +++ b/libs/fileset/ignorer.go @@ -1,5 +1,9 @@ package fileset +import ( + ignore "github.com/sabhiram/go-gitignore" +) + // Ignorer is the interface for what determines if a path // in the [FileSet] must be ignored or not. type Ignorer interface { @@ -17,3 +21,28 @@ func (nopIgnorer) IgnoreFile(path string) (bool, error) { func (nopIgnorer) IgnoreDirectory(path string) (bool, error) { return false, nil } + +type includer struct { + matcher *ignore.GitIgnore +} + +func newIncluder(includes []string) *includer { + matcher := ignore.CompileIgnoreLines(includes...) + return &includer{ + matcher, + } +} + +func (i *includer) IgnoreFile(path string) (bool, error) { + return i.ignore(path), nil +} + +func (i *includer) IgnoreDirectory(path string) (bool, error) { + return false, nil +} + +func (i *includer) ignore(path string) bool { + matched := i.matcher.MatchesPath(path) + // If matched, do not ignore the file because we want to include it + return !matched +} diff --git a/libs/sync/sync_test.go b/libs/sync/sync_test.go index 99c7e04b15..0f1ad61baf 100644 --- a/libs/sync/sync_test.go +++ b/libs/sync/sync_test.go @@ -48,8 +48,25 @@ func setupFiles(t *testing.T) string { err = createFile(dbDir, "e.go") require.NoError(t, err) - return dir + testDir := filepath.Join(dir, "test") + err = os.Mkdir(testDir, 0755) + require.NoError(t, err) + + sub1 := filepath.Join(testDir, "sub1") + err = os.Mkdir(sub1, 0755) + require.NoError(t, err) + + err = createFile(sub1, "f.go") + require.NoError(t, err) + + sub2 := filepath.Join(sub1, "sub2") + err = os.Mkdir(sub2, 0755) + require.NoError(t, err) + + err = createFile(sub2, "g.go") + require.NoError(t, err) + return dir } func TestGetFileSet(t *testing.T) { @@ -78,7 +95,7 @@ func TestGetFileSet(t *testing.T) { fileList, err := getFileList(ctx, s) require.NoError(t, err) - require.Equal(t, len(fileList), 7) + require.Equal(t, len(fileList), 9) inc, err = fileset.NewGlobSet(dir, []string{}) require.NoError(t, err) @@ -98,7 +115,7 @@ func TestGetFileSet(t *testing.T) { require.NoError(t, err) require.Equal(t, len(fileList), 1) - inc, err = fileset.NewGlobSet(dir, []string{".databricks/*.*"}) + inc, err = fileset.NewGlobSet(dir, []string{".databricks/*"}) require.NoError(t, err) excl, err = fileset.NewGlobSet(dir, []string{}) @@ -114,6 +131,34 @@ func TestGetFileSet(t *testing.T) { fileList, err = getFileList(ctx, s) require.NoError(t, err) - require.Equal(t, len(fileList), 8) + require.Equal(t, len(fileList), 10) +} + +func TestRecursiveExclude(t *testing.T) { + ctx := context.Background() + + dir := setupFiles(t) + fileSet, err := git.NewFileSet(dir) + require.NoError(t, err) + err = fileSet.EnsureValidGitIgnoreExists() + require.NoError(t, err) + + inc, err := fileset.NewGlobSet(dir, []string{}) + require.NoError(t, err) + + excl, err := fileset.NewGlobSet(dir, []string{"test/**"}) + require.NoError(t, err) + + s := &Sync{ + SyncOptions: &SyncOptions{}, + + fileSet: fileSet, + includeFileSet: inc, + excludeFileSet: excl, + } + + fileList, err := getFileList(ctx, s) + require.NoError(t, err) + require.Equal(t, len(fileList), 7) } From d650fd00e260bfc780f32593059423fc143434bb Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 9 Oct 2023 17:22:11 +0200 Subject: [PATCH 2/4] fix windows tests --- libs/fileset/glob.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/fileset/glob.go b/libs/fileset/glob.go index c160204605..5088f3f888 100644 --- a/libs/fileset/glob.go +++ b/libs/fileset/glob.go @@ -17,7 +17,7 @@ func NewGlobSet(root string, includes []string) (*GlobSet, error) { } for k := range includes { - includes[k] = filepath.Clean(includes[k]) + includes[k] = filepath.Clean(filepath.FromSlash(includes[k])) } fs := &FileSet{ From bf332221ca792a5cd37834385c985ba1c7415fa2 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 9 Oct 2023 19:05:08 +0200 Subject: [PATCH 3/4] refactoring --- libs/fileset/glob.go | 17 +++-------------- libs/sync/sync.go | 4 ++-- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/libs/fileset/glob.go b/libs/fileset/glob.go index 5088f3f888..9d8626e54b 100644 --- a/libs/fileset/glob.go +++ b/libs/fileset/glob.go @@ -4,30 +4,19 @@ import ( "path/filepath" ) -type GlobSet struct { - fs *FileSet - root string - patterns []string -} - -func NewGlobSet(root string, includes []string) (*GlobSet, error) { +func NewGlobSet(root string, includes []string) (*FileSet, error) { absRoot, err := filepath.Abs(root) if err != nil { return nil, err } for k := range includes { - includes[k] = filepath.Clean(filepath.FromSlash(includes[k])) + includes[k] = filepath.ToSlash(filepath.Clean(includes[k])) } fs := &FileSet{ absRoot, newIncluder(includes), } - return &GlobSet{fs, absRoot, includes}, nil -} - -// Return all files which matches defined glob patterns -func (s *GlobSet) All() ([]File, error) { - return s.fs.All() + return fs, nil } diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 8be478fc3f..beb3f6a33d 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -37,8 +37,8 @@ type Sync struct { *SyncOptions fileSet *git.FileSet - includeFileSet *fileset.GlobSet - excludeFileSet *fileset.GlobSet + includeFileSet *fileset.FileSet + excludeFileSet *fileset.FileSet snapshot *Snapshot filer filer.Filer From 2963c75caf131ef7c7f366f36fbc4184f7889542 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 10 Oct 2023 10:38:30 +0200 Subject: [PATCH 4/4] added a comment --- libs/fileset/ignorer.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libs/fileset/ignorer.go b/libs/fileset/ignorer.go index 5e8a2220be..eb87682f74 100644 --- a/libs/fileset/ignorer.go +++ b/libs/fileset/ignorer.go @@ -37,6 +37,15 @@ func (i *includer) IgnoreFile(path string) (bool, error) { return i.ignore(path), nil } +// In the context of 'include' functionality, the Ignorer logic appears to be reversed: +// For patterns like 'foo/bar/' which intends to match directories only, we still need to traverse into the directory for potential file matches. +// Ignoring the directory entirely isn't an option, especially when dealing with patterns like 'foo/bar/*.go'. +// While this pattern doesn't target directories, it does match all Go files within them and ignoring directories not matching the pattern +// Will result in missing file matches. +// During the tree traversal process, we call 'IgnoreDirectory' on ".", "./foo", and "./foo/bar", +// all while applying the 'foo/bar/*.go' pattern. To handle this situation effectively, it requires to make the code more complex. +// This could mean generating various prefix patterns to facilitate the exclusion of directories from traversal. +// It's worth noting that, in this particular case, opting for a simpler logic results in a performance trade-off. func (i *includer) IgnoreDirectory(path string) (bool, error) { return false, nil }