Skip to content

Commit 3090718

Browse files
authored
fix: include data information in cache key generation (microsoft#566)
1 parent 7eb7f49 commit 3090718

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

rdagent/utils/env.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,18 @@ def cached_run(
428428
"""
429429
target_folder = Path(RD_AGENT_SETTINGS.pickle_cache_folder_path_str) / f"utils.env.run"
430430
target_folder.mkdir(parents=True, exist_ok=True)
431+
432+
# we must add the information of data (beyound code) into the key.
433+
# Otherwise, all commands operating on data will become invalue (e.g. rm -r submission.csv)
434+
# So we recursively walk in the folder and add the sorted relative filename list as part of the key.
435+
data_key = []
436+
for path in Path(local_path).rglob("*"):
437+
p = str(path.relative_to(Path(local_path)))
438+
if p.startswith("__pycache__"):
439+
continue
440+
data_key.append(p)
441+
data_key = sorted(data_key)
442+
431443
key = md5_hash(
432444
json.dumps(
433445
[
@@ -437,6 +449,7 @@ def cached_run(
437449
)
438450
+ json.dumps({"entry": entry, "running_extra_volume": running_extra_volume})
439451
+ json.dumps({"extra_volumes": self.conf.extra_volumes})
452+
+ json.dumps(data_key)
440453
)
441454
if Path(target_folder / f"{key}.pkl").exists() and Path(target_folder / f"{key}.zip").exists():
442455
with open(target_folder / f"{key}.pkl", "rb") as f:

0 commit comments

Comments
 (0)