Skip to content

Commit 5b3f5f6

Browse files
authored
fix: improve file tree and _walk symlink handling (microsoft#877)
* refactor: improve file tree and _walk symlink handling * remove unused code * lint
1 parent 69e5c3a commit 5b3f5f6

File tree

1 file changed

+32
-4
lines changed
  • rdagent/scenarios/data_science/scen

1 file changed

+32
-4
lines changed

rdagent/scenarios/data_science/scen/utils.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,16 +244,37 @@ def get_file_len_size(f: Path) -> tuple[int, str]:
244244
def file_tree(path: Path, depth=0) -> str:
245245
"""Generate a tree structure of files in a directory"""
246246
result = []
247+
247248
files = [p for p in Path(path).iterdir() if not p.is_dir()]
248-
dirs = [p for p in Path(path).iterdir() if p.is_dir()]
249+
249250
max_n = 4 if len(files) > 30 else 8
250251
for p in sorted(files)[:max_n]:
251252
result.append(f"{' '*depth*4}{p.name} ({get_file_len_size(p)[1]})")
252253
if len(files) > max_n:
253254
result.append(f"{' '*depth*4}... and {len(files)-max_n} other files")
254255

256+
dirs = [p for p in Path(path).iterdir() if p.is_dir() or (p.is_symlink() and p.resolve().is_dir())]
257+
258+
# Calculate base_path (the top-level resolved absolute directory)
259+
base_path = Path(path).resolve()
260+
# Find the top-level base_path when in recursion (depth>0)
261+
if depth > 0:
262+
# The top-level base_path is the ancestor at depth==0
263+
ancestor = Path(path)
264+
for _ in range(depth):
265+
ancestor = ancestor.parent
266+
base_path = ancestor.resolve()
267+
255268
for p in sorted(dirs):
256-
result.append(f"{' '*depth*4}{p.name}/")
269+
if p.is_symlink():
270+
target = p.resolve()
271+
if str(target).startswith(str(base_path)):
272+
# avoid recursing into symlinks pointing inside base path
273+
result.append(
274+
f"{' ' * depth * 4}{p.name}@ -> {os.path.relpath(target, base_path)} (symlinked dir, not expanded)"
275+
)
276+
continue
277+
result.append(f"{' ' * depth * 4}{p.name}/")
257278
result.append(file_tree(p, depth + 1))
258279

259280
return "\n".join(result)
@@ -263,9 +284,16 @@ def _walk(path: Path):
263284
"""Recursively walk a directory (analogous to os.walk but for pathlib.Path)"""
264285
for p in sorted(Path(path).iterdir()):
265286
if p.is_dir():
287+
# If this is a symlinked dir to a parent/ancestor, do not expand it
288+
if p.is_symlink():
289+
target = p.resolve()
290+
cur_path = p.parent.resolve()
291+
if target == cur_path or str(cur_path).startswith(str(target)):
292+
yield p
293+
continue
266294
yield from _walk(p)
267-
continue
268-
yield p
295+
else:
296+
yield p
269297

270298

271299
def preview_csv(p: Path, file_name: str, simple=True, show_nan_columns=False) -> str:

0 commit comments

Comments
 (0)