Skip to content

Commit e44a2bd

Browse files
authored
Added ignore kwarg to traverse() in common, allowing directory pruning (#209)
core: added ignore option to traverse, pruning directories from fdfind/find/os.walk
1 parent 708d073 commit e44a2bd

File tree

7 files changed

+84
-8
lines changed

7 files changed

+84
-8
lines changed

src/promnesia/common.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pathlib import Path
66
from glob import glob
77
import itertools
8+
from more_itertools import intersperse
89
import logging
910
from functools import lru_cache
1011
import shutil
@@ -390,26 +391,50 @@ def mime(path: PathIsh) -> Optional[str]:
390391
return magic(ps)
391392

392393

393-
def find_args(root: Path, follow: bool) -> List[str]:
394+
def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
395+
prune_dir_args = []
396+
ignore_file_args = []
397+
if ignore:
398+
# -name {name} for all the file/directories in ignore
399+
ignore_names = [['-name', n] for n in ignore]
400+
# OR (-o) all the names together and flatten
401+
ignore_names_l = list(itertools.chain(*intersperse(['-o'], ignore_names)))
402+
# Prune all of those directories, and make the entire clause evaluate to false
403+
# (so that it doesn't match anything and make find print)
404+
prune_dir_args = ['-type', 'd', '-a', '(', *ignore_names_l, ')', '-prune', '-false', '-o']
405+
# Also ignore any files with the names as well
406+
ignore_file_args = ['-a', '-not', '(', *ignore_names_l, ')']
407+
394408
return [
395409
*(['-L'] if follow else []),
396410
str(root),
411+
*prune_dir_args,
397412
'-type', 'f',
413+
*ignore_file_args
398414
]
399415

400416

401-
def fdfind_args(root: Path, follow: bool) -> List[str]:
417+
def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
402418
from .config import extra_fd_args
419+
420+
ignore_args = []
421+
if ignore:
422+
# Add a statment that excludes the folder
423+
ignore_args = [['--exclude', f'{n}'] for n in ignore]
424+
# Flatten the list of lists
425+
ignore_args_l = list(itertools.chain(*ignore_args))
426+
403427
return [
404428
*extra_fd_args(),
429+
*ignore_args_l,
405430
*(['--follow'] if follow else []),
406431
'--type', 'f',
407432
'.',
408433
str(root),
409434
]
410435

411436

412-
def traverse(root: Path, *, follow: bool=True) -> Iterable[Path]:
437+
def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
413438
if not root.is_dir():
414439
yield root
415440
return
@@ -418,16 +443,20 @@ def traverse(root: Path, *, follow: bool=True) -> Iterable[Path]:
418443
if _is_windows:
419444
# on windows could use 'forfiles'... but probably easier not to bother for now
420445
# todo coild use followlinks=True? walk could end up in infinite loop?
421-
for r, _, files in os.walk(root):
422-
yield from (Path(r) / f for f in files)
446+
for r, dirs, files in os.walk(root):
447+
# Remove dirs specified in ignore (clone dirs() as we have to remove in place)
448+
for i, d in enumerate(list(dirs)):
449+
if d in ignore:
450+
del dirs[i]
451+
yield from (Path(r) / f for f in files if f not in ignore)
423452
return
424453

425454
from .compat import Popen, PIPE
426-
cmd = ['find', *find_args(root, follow=follow)]
455+
cmd = ['find', *find_args(root, follow=follow, ignore=ignore)]
427456
# try to use fd.. it cooperates well with gitignore etc, also faster than find
428457
for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
429458
if shutil.which(x):
430-
cmd = [x, *fdfind_args(root, follow=follow)]
459+
cmd = [x, *fdfind_args(root, follow=follow, ignore=ignore)]
431460
break
432461
else:
433462
warnings.warn("'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'")

src/promnesia/sources/auto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def _index(path: Path, opts: Options) -> Results:
216216

217217
# iterate over resolved paths, to avoid duplicates
218218
def rit() -> Iterable[Path]:
219-
it = traverse(path, follow=opts.follow)
219+
it = traverse(path, follow=opts.follow, ignore=IGNORE)
220220
for p in it:
221221
if any(fnmatch(str(p), o) for o in opts.ignored):
222222
# TODO not sure if should log here... might end up with quite a bit of logs

tests/test_traverse.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from pathlib import Path
2+
from promnesia.common import traverse
3+
from unittest.mock import Mock, patch
4+
from common import DATA
5+
6+
7+
testDataPath = Path(DATA) / 'traverse'
8+
9+
# Patch shutil.which so it always returns false (when trying to which fdfind, etc)
10+
# so that it falls back to find
11+
@patch('promnesia.common.shutil.which', return_value=False)
12+
def test_traverse_ignore_find(patched):
13+
'''
14+
traverse() with `find` but ignore some stuff
15+
'''
16+
# act
17+
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
18+
19+
# assert
20+
assert paths == {testDataPath / 'imhere2/real.txt', testDataPath / 'imhere.txt'}
21+
22+
def test_traverse_ignore_fdfind():
23+
'''
24+
traverse() with `fdfind` but ignore some stuff
25+
'''
26+
# act
27+
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
28+
29+
# assert
30+
assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
31+
32+
# TODO: It would be nice to test the implementation directly without having to do this
33+
# weird patching in the future
34+
@patch('promnesia.common._is_windows', new_callable=lambda: True)
35+
def test_traverse_ignore_windows(patched):
36+
'''
37+
traverse() with python when _is_windows is true but ignore some stuff
38+
'''
39+
# act
40+
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
41+
42+
# assert
43+
assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
jaiofjeoriheoirjg
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
notrealignores

tests/testdata/traverse/imhere.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
imhere.txt
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
jdfioja

0 commit comments

Comments
 (0)