Skip to content

Commit 16da862

Browse files
authored
fix: add force parameter for cache_with_pickle & using cache when get kaggle leaderboard (microsoft#687)
* use kaggleApi latest edition * add 'force' for cache_with_pickle, use cache when getting kaggle leaderboard
1 parent 26d4dab commit 16da862

File tree

3 files changed

+14
-3
lines changed

3 files changed

+14
-3
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ init-qlib-env:
6868

6969
dev:
7070
$(PIPRUN) pip install -e .[docs,lint,package,test] -c $(CONSTRAINTS_FILE)
71+
$(PIPRUN) pip install -U kaggle
7172
if [ "$(CI)" != "true" ] && command -v pre-commit > /dev/null 2>&1; then pre-commit install --hook-type pre-push; fi
7273

7374
# Generate constraints for current Python version.

rdagent/core/utils.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def multiprocessing_wrapper(func_calls: list[tuple[Callable, tuple]], n: int) ->
153153
return [result.get() for result in results]
154154

155155

156-
def cache_with_pickle(hash_func: Callable, post_process_func: Callable | None = None) -> Callable:
156+
def cache_with_pickle(hash_func: Callable, post_process_func: Callable | None = None, force: bool = False) -> Callable:
157157
"""
158158
This decorator will cache the return value of the function with pickle.
159159
The cache key is generated by the hash_func. The hash function returns a string or None.
@@ -162,12 +162,21 @@ def cache_with_pickle(hash_func: Callable, post_process_func: Callable | None =
162162
The post_process_func will be called with the original arguments and the cached result
163163
to give each caller a chance to process the cached result. The post_process_func should
164164
return the final result.
165+
166+
Parameters
167+
----------
168+
hash_func : Callable
169+
The function to generate the hash key for the cache.
170+
post_process_func : Callable | None, optional
171+
The function to process the cached result, by default None.
172+
force : bool, optional
173+
If True, the cache will be used even if RD_AGENT_SETTINGS.cache_with_pickle is False, by default False.
165174
"""
166175

167176
def cache_decorator(func: Callable) -> Callable:
168177
@functools.wraps(func)
169178
def cache_wrapper(*args: Any, **kwargs: Any) -> Any:
170-
if not RD_AGENT_SETTINGS.cache_with_pickle:
179+
if not RD_AGENT_SETTINGS.cache_with_pickle and not force:
171180
return func(*args, **kwargs)
172181

173182
target_folder = Path(RD_AGENT_SETTINGS.pickle_cache_folder_path_str) / f"{func.__module__}.{func.__name__}"

rdagent/scenarios/kaggle/kaggle_crawler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING
1919
from rdagent.core.conf import ExtendedBaseSettings
2020
from rdagent.core.exception import KaggleError
21-
from rdagent.core.prompts import Prompts
21+
from rdagent.core.utils import cache_with_pickle
2222
from rdagent.log import rdagent_logger as logger
2323
from rdagent.oai.llm_utils import APIBackend
2424
from rdagent.scenarios.data_science.debug.data import create_debug_data
@@ -186,6 +186,7 @@ def unzip_data(unzip_file_path: str, unzip_target_path: str) -> None:
186186
zip_ref.extractall(unzip_target_path)
187187

188188

189+
@cache_with_pickle(hash_func=lambda x: x, force=True)
189190
def leaderboard_scores(competition: str) -> list[float]:
190191
from kaggle.api.kaggle_api_extended import KaggleApi
191192

0 commit comments

Comments
 (0)