fix: fix the bug of Exceed-LLM-Context in online merge of multi-tarce (#892)

xuangu-fang · web-flow · commit f760a3eff7bd · 2025-05-22T16:10:00.000+08:00
* set constrains on max_sota_retrieved, fix logis on identical problem

* fix: only Auto SOTA selector use max_sota_retrieved_num

* set max_sota_retrieved_num=10 by default

* minor update

* auto lint
diff --git a/rdagent/app/data_science/conf.py b/rdagent/app/data_science/conf.py
@@ -98,5 +98,10 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     merge_hours: int = 2
     """The time for merge"""
 
+    #### multi-trace: max SOTA-retrieved number, used in AutoSOTAexpSelector
+    # constrains the number of SOTA experiments to retrieve, otherwise too many SOTA experiments to retrieve will cause the exceed of the context window of LLM
+    max_sota_retrieved_num: int = 10
+    """The maximum number of SOTA experiments to retrieve in a LLM call"""
+
 
 DS_RD_SETTING = DataScienceBasePropSetting()
diff --git a/rdagent/log/mle_summary.py b/rdagent/log/mle_summary.py
@@ -10,11 +10,7 @@
 from rdagent.core.experiment import FBWorkspace
 from rdagent.core.proposal import ExperimentFeedback
 from rdagent.log.storage import FileStorage
-from rdagent.log.utils import (
-    extract_json,
-    extract_loopid_func_name,
-    is_valid_session,
-)
+from rdagent.log.utils import extract_json, extract_loopid_func_name, is_valid_session
 from rdagent.log.utils.folder import get_first_session_file_after_duration
 from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
 from rdagent.scenarios.data_science.test_eval import (
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/base.py b/rdagent/scenarios/data_science/proposal/exp_gen/base.py
@@ -212,11 +212,15 @@ def experiment_and_feedback_list_after_init(
         return_type: Literal["sota", "failed", "all"],
         search_type: Literal["all", "ancestors"] = "all",
         selection: tuple[int, ...] | None = None,
+        max_retrieve_num: int | None = None,
     ) -> list[tuple[DSExperiment, ExperimentFeedback]]:
         """
         Retrieve a list of experiments and feedbacks based on the return_type.
         """
         search_list = self.retrieve_search_list(search_type, selection=selection)
+        if max_retrieve_num is not None and len(search_list) > 0:
+            retrieve_num = min(max_retrieve_num, len(search_list))
+            search_list = search_list[:retrieve_num]
 
         final_component = self.COMPLETE_ORDER[-1]
         has_final_component = True if DS_RD_SETTING.coder_on_whole_pipeline else False
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/merge.py b/rdagent/scenarios/data_science/proposal/exp_gen/merge.py
@@ -147,9 +147,12 @@ def gen(self, trace: DSTrace, selection: tuple[int, ...] = (-1,)) -> DSExperimen
             )
 
             success_fb_list = trace.experiment_and_feedback_list_after_init(
-                return_type="sota", search_type="ancestors", selection=(leaves[i],)
+                return_type="sota",
+                search_type="ancestors",
+                selection=(leaves[i],),
             )
             if len(success_fb_list) > 0:
+
                 exp_to_merge_fb_desc = T("scenarios.data_science.share:describe.trace").r(
                     exp_and_feedback_list=success_fb_list,
                     type="success",
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py b/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
@@ -694,8 +694,9 @@ def gen(self, trace: DSTrace, pipeline: bool = False) -> DSExperiment:
             inject_diverse = False
 
         # Step 1: Identify problems
+        current_sub_trace = trace.collect_all_ancestors(selection=(-1,))
         all_problems = {}
-        if len(trace.hist) >= 3:
+        if len(current_sub_trace) >= 3:
             fb_problems = self.identify_feedback_problem(
                 scenario_desc=scenario_desc,
                 exp_feedback_list_desc=exp_feedback_list_desc,
@@ -706,7 +707,7 @@ def gen(self, trace: DSTrace, pipeline: bool = False) -> DSExperiment:
                 fb_problems[problem_name]["label"] = "FEEDBACK_PROBLEM"
                 all_problems[problem_name] = fb_problems[problem_name]
 
-        if len(trace.hist) < 9:
+        if len(current_sub_trace) < 9:
             scen_problems = self.identify_scenario_problem(
                 scenario_desc=scenario_desc,
                 sota_exp_desc=sota_exp_desc,
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/sota_exp_select.py b/rdagent/scenarios/data_science/proposal/exp_gen/sota_exp_select.py
@@ -44,7 +44,9 @@ def __init__(
     def get_sota_exp_to_submit(self, trace: Trace) -> DSExperiment | None:
         # retrieve all SOTA experiments from the trace
 
-        sota_exp_fb_list = trace.experiment_and_feedback_list_after_init(return_type="sota", search_type="all")
+        sota_exp_fb_list = trace.experiment_and_feedback_list_after_init(
+            return_type="sota", search_type="all", max_retrieve_num=DS_RD_SETTING.max_sota_retrieved_num
+        )
 
         if len(sota_exp_fb_list) == 0:
             logger.info("Auto SOTA selector: No SOTA in trace yet")
@@ -58,10 +60,32 @@ def get_sota_exp_to_submit(self, trace: Trace) -> DSExperiment | None:
             return sota_exp_fb_list[0][0]
 
         else:
-            logger.info("Auto SOTA selector: Multiple SOTA in trace, calling LLM to select the best one")
+            logger.info(
+                f"Auto SOTA selector: Multiple SOTA in trace, calling LLM to select the best one in {DS_RD_SETTING.max_sota_retrieved_num} SOTA experiments"
+            )
 
             SOAT_exp_with_desc_and_scores = "Historical SOTA experiments:\n\n"
 
+            leaves: list[int] = trace.get_leaves()
+
+            if len(leaves) >= 2:
+                # multiple trace case, collect the latest SOTA experiments from each trace
+                new_sota_exp_fb_list: list[tuple[DSExperiment, ExperimentFeedback]] = []
+                # calculate the number of SOTA experiments to retrieve from each trace
+                max_sota_retrieved_num_per_trace = DS_RD_SETTING.max_sota_retrieved_num // len(leaves)
+                # recall, due to the integer division, the final number of SOTA experiments to retrieve may be different
+                for leaf in leaves:
+                    sota_exp_fb_list_per_trace = trace.experiment_and_feedback_list_after_init(
+                        return_type="sota",
+                        search_type="ancestors",
+                        selection=(leaf,),
+                        max_retrieve_num=max_sota_retrieved_num_per_trace,
+                    )
+
+                    new_sota_exp_fb_list.extend(sota_exp_fb_list_per_trace)
+
+                sota_exp_fb_list = new_sota_exp_fb_list
+
             for i, (exp, ef) in enumerate(sota_exp_fb_list):
                 if exp:
                     current_final_score = pd.DataFrame(exp.result).loc["ensemble"].iloc[0]

Original file line number	Diff line number	Diff line change
`@@ -147,9 +147,12 @@ def gen(self, trace: DSTrace, selection: tuple[int, ...] = (-1,)) -> DSExperimen`
`147`	`147`	`)`
`148`	`148`
`149`	`149`	`success_fb_list = trace.experiment_and_feedback_list_after_init(`
`150`		`- return_type="sota", search_type="ancestors", selection=(leaves[i],)`
	`150`	`+ return_type="sota",`
	`151`	`+ search_type="ancestors",`
	`152`	`+ selection=(leaves[i],),`
`151`	`153`	`)`
`152`	`154`	`if len(success_fb_list) > 0:`
	`155`	`+`
`153`	`156`	`exp_to_merge_fb_desc = T("scenarios.data_science.share:describe.trace").r(`
`154`	`157`	`exp_and_feedback_list=success_fb_list,`
`155`	`158`	`type="success",`