fix: fix a bug in model tuning feedback (microsoft#316)

WinstonLiyt · web-flow · commit b0672baa9d93 · 2024-09-24T17:23:18.000+08:00
* fix a bug

* fix two bugs
diff --git a/rdagent/scenarios/kaggle/developer/feedback.py b/rdagent/scenarios/kaggle/developer/feedback.py
@@ -114,7 +114,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
         # Prepare render dictionary
         render_dict = {
             "context": self.scen.get_scenario_all_desc(),
-            "last_hypothesis": trace.hist[-1][0].hypothesis if trace.hist else None,
+            "last_hypothesis": trace.hist[-1][0] if trace.hist else None,
             "last_task_and_code": last_task_and_code,
             "last_result": trace.hist[-1][1].result if trace.hist else None,
             "hypothesis": hypothesis,
diff --git a/rdagent/scenarios/kaggle/prompts.yaml b/rdagent/scenarios/kaggle/prompts.yaml
@@ -110,7 +110,7 @@ model_experiment_output_format: |-
   }
   Usually, a larger model works better than a smaller one. Hence, the parameters should be larger.
 
-model_feedback_generation:
+model_tuning_feedback_generation:
   system: |-
     You are a professional result analysis assistant. You will receive a result and a hypothesis.
     Your task is to provide feedback on how well the result supports or refutes the hypothesis by judging from the observation of performance increase or decrease.
@@ -149,8 +149,7 @@ model_feedback_generation:
     {% if last_hypothesis %} 
     Last Round Information:
     Hypothesis: {{last_hypothesis.hypothesis}}
-    Task: {{last_task}}
-    Code Implemented: {{last_code}}
+    Last Task and Code: {{last_task_and_code}}
     Result: {{last_result}}
     {% else %}
     This is the first round. No previous information available. As long as the performance is not too negative (eg.ICIR is greater than 0), treat it as successful. Do not set the threshold too high.