Skip to content

Commit 6b6e773

Browse files
authored
perf: some small upgrade to factor costeer to improve the performance (microsoft#420)
* 1. use dataframe.info instead of head 2. in former trace query, add the latest attempt to the last success execution * fix CI
1 parent f1495cc commit 6b6e773

File tree

6 files changed

+44
-9
lines changed

6 files changed

+44
-9
lines changed

rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,17 @@ def implement_one_factor(
216216
) # A dict, {{error_type:[[error_imp_knowledge, success_imp_knowledge],...]},...}
217217

218218
queried_former_failed_knowledge = (
219-
queried_knowledge.former_traces[target_factor_task_information] if queried_knowledge is not None else []
219+
queried_knowledge.former_traces[target_factor_task_information][0]
220+
if queried_knowledge is not None
221+
else []
220222
)
221223

222224
queried_former_failed_knowledge_to_render = queried_former_failed_knowledge
223225

226+
latest_attempt_to_latest_successful_execution = queried_knowledge.former_traces[
227+
target_factor_task_information
228+
][1]
229+
224230
system_prompt = (
225231
Environment(undefined=StrictUndefined)
226232
.from_string(
@@ -296,6 +302,7 @@ def implement_one_factor(
296302
queried_similar_error_knowledge=queried_similar_error_knowledge_to_render,
297303
error_summary=error_summary,
298304
error_summary_critics=error_summary_critics,
305+
latest_attempt_to_latest_successful_execution=latest_attempt_to_latest_successful_execution,
299306
)
300307
.strip("\n")
301308
)

rdagent/components/coder/factor_coder/CoSTEER/knowledge_management.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ def query(self, evo: EvolvableSubjects, evolving_trace: list[EvoStep]) -> Querie
296296
evo,
297297
factor_implementation_queried_graph_knowledge,
298298
FACTOR_IMPLEMENT_SETTINGS.v2_query_former_trace_limit,
299+
FACTOR_IMPLEMENT_SETTINGS.v2_add_fail_attempt_to_latest_successful_execution,
299300
)
300301
factor_implementation_queried_graph_knowledge = self.component_query(
301302
evo,
@@ -392,6 +393,7 @@ def former_trace_query(
392393
evo: EvolvableSubjects,
393394
factor_implementation_queried_graph_knowledge: FactorQueriedGraphKnowledge,
394395
v2_query_former_trace_limit: int = 5,
396+
v2_add_fail_attempt_to_latest_successful_execution: bool = False,
395397
) -> Union[QueriedKnowledge, set]:
396398
"""
397399
Query the former trace knowledge of the working trace, and find all the failed task information which tried more than fail_task_trial_limit times
@@ -429,11 +431,25 @@ def former_trace_query(
429431
else:
430432
current_index += 1
431433

432-
factor_implementation_queried_graph_knowledge.former_traces[
433-
target_factor_task_information
434-
] = former_trace_knowledge[-v2_query_former_trace_limit:]
434+
latest_attempt = None
435+
if v2_add_fail_attempt_to_latest_successful_execution:
436+
# When the last successful execution is not the last one in the working trace, it means we have tried to correct it. We should tell the agent this fail trial to avoid endless loop in the future.
437+
if (
438+
len(former_trace_knowledge) > 0
439+
and len(self.knowledgebase.working_trace_knowledge[target_factor_task_information]) > 1
440+
and self.knowledgebase.working_trace_knowledge[target_factor_task_information].index(
441+
former_trace_knowledge[-1]
442+
)
443+
< len(self.knowledgebase.working_trace_knowledge[target_factor_task_information]) - 1
444+
):
445+
latest_attempt = self.knowledgebase.working_trace_knowledge[target_factor_task_information][-1]
446+
447+
factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = (
448+
former_trace_knowledge[-v2_query_former_trace_limit:],
449+
latest_attempt,
450+
)
435451
else:
436-
factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = []
452+
factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = ([], None)
437453

438454
return factor_implementation_queried_graph_knowledge
439455

@@ -607,7 +623,7 @@ def error_query(
607623
):
608624
queried_last_trace = factor_implementation_queried_graph_knowledge.former_traces[
609625
target_factor_task_information
610-
][-1]
626+
][0][-1]
611627
target_index = self.knowledgebase.working_trace_knowledge[target_factor_task_information].index(
612628
queried_last_trace,
613629
)

rdagent/components/coder/factor_coder/CoSTEER/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def LLMSelect(
4040
# find corresponding former trace for each task
4141
target_factor_task_information = evo.sub_tasks[i].get_task_information()
4242
if target_factor_task_information in former_trace:
43-
tasks.append((i, evo.sub_tasks[i], former_trace[target_factor_task_information]))
43+
tasks.append((i, evo.sub_tasks[i], former_trace[target_factor_task_information][0]))
4444

4545
system_prompt = (
4646
Environment(undefined=StrictUndefined)

rdagent/components/coder/factor_coder/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class Config:
3737
v2_query_component_limit: int = 1
3838
v2_query_error_limit: int = 1
3939
v2_query_former_trace_limit: int = 1
40+
v2_add_fail_attempt_to_latest_successful_execution: bool = False
4041
v2_error_summary: bool = False
4142
v2_knowledge_sampler: float = 1.0
4243

rdagent/components/coder/factor_coder/prompts.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,13 @@ evolving_strategy_factor_implementation_v2_user: |-
118118
{{ similar_component_knowledge.implementation.code }}
119119
{% endfor %}
120120
{% endif %}
121+
{% if latest_attempt_to_latest_successful_execution is not none %}
122+
You have tried to correct your former failed code but still met some errors. Here is the latest attempt to the latest successful execution, try not to get the same error to your new code:
123+
=====Your latest attempt=====
124+
{{ latest_attempt_to_latest_successful_execution.implementation.code }}
125+
=====Feedback to your latest attempt=====
126+
{{ latest_attempt_to_latest_successful_execution.feedback }}
127+
{% endif %}
121128
122129
evolving_strategy_error_summary_v2_system: |-
123130
User is trying to implement some factors in the following scenario:

rdagent/scenarios/qlib/experiment/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import io
12
import re
23
import shutil
34
from pathlib import Path
@@ -81,10 +82,13 @@ def get_file_desc(p: Path) -> str:
8182
pd.set_option("display.max_columns", None) # or 1000
8283
pd.set_option("display.max_rows", None) # or 1000
8384
pd.set_option("display.max_colwidth", None) # or 199
85+
86+
buffer = io.StringIO()
87+
df.info(verbose=True, buf=buffer, show_counts=False)
8488
return JJ_TPL.render(
8589
file_name=p.name,
86-
type_desc="generated by `pd.read_hdf(filename).head()`",
87-
content=df.head().to_string(),
90+
type_desc="generated by `df.info(verbose=True, show_counts=False)`",
91+
content=buffer.getvalue(),
8892
)
8993
elif p.name.endswith(".md"):
9094
with open(p) as f:

0 commit comments

Comments
 (0)