Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
fix ci
  • Loading branch information
WinstonLiyt committed Feb 20, 2025
commit 5f9470611cc4ec89380a4788fec21bd42d9c9fa6
24 changes: 13 additions & 11 deletions rdagent/components/coder/data_science/workflow/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,19 +100,21 @@ def evaluate(
base_check_code = (DIRNAME / "eval_tests" / "submission_format_test.txt").read_text()
implementation.inject_files(**{"test/submission_format_test.py": base_check_code})
# stdout += "----Submission Check 1-----\n"
submission_stdout, submission_ret_code = implementation.execute_ret_code(env=de, entry="python test/submission_format_test.py")
submission_stdout, submission_ret_code = implementation.execute_ret_code(
env=de, entry="python test/submission_format_test.py"
)
stdout += submission_stdout

# MLEBench Check
# !!! Since we are running on a sampled dataset, mlebench check is not required.
# mle_check_code = (
# (DIRNAME / "eval_tests" / "mle_submission_format_test.txt")
# .read_text()
# .replace("<competition_id>", self.scen.competition)
# )
# implementation.inject_files(**{"test/mle_submission_format_test.py": mle_check_code})
# stdout += "----Submission Check 2-----\n"
# stdout += implementation.execute(env=mde, entry=f"python test/mle_submission_format_test.py")
# MLEBench Check
# !!! Since we are running on a sampled dataset, mlebench check is not required.
# mle_check_code = (
# (DIRNAME / "eval_tests" / "mle_submission_format_test.txt")
# .read_text()
# .replace("<competition_id>", self.scen.competition)
# )
# implementation.inject_files(**{"test/mle_submission_format_test.py": mle_check_code})
# stdout += "----Submission Check 2-----\n"
# stdout += implementation.execute(env=mde, entry=f"python test/mle_submission_format_test.py")

system_prompt = T(".prompts:workflow_eval.system").r(
scenario=self.scen.get_scenario_all_desc(),
Expand Down
Loading