diff --git a/rdagent/components/coder/data_science/workflow/eval.py b/rdagent/components/coder/data_science/workflow/eval.py index f09f6adac..abc888c68 100644 --- a/rdagent/components/coder/data_science/workflow/eval.py +++ b/rdagent/components/coder/data_science/workflow/eval.py @@ -77,8 +77,10 @@ def evaluate( # Check score file score_fp = implementation.workspace_path / "scores.csv" + score_ret_code = 0 if not score_fp.exists(): - stdout += "\nMetrics file (scores.csv) is not generated." + stdout += "\n[Error] Metrics file (scores.csv) is not generated!" + score_ret_code = 1 else: try: score_df = pd.read_csv(score_fp, index_col=0) @@ -89,29 +91,30 @@ def evaluate( for model in model_set_in_folder: if model not in model_set_in_scores: stdout += f"\nModel {model} is not evaluated in the scores.csv. The scores.csv has {model_set_in_scores}." + score_ret_code = 1 except Exception as e: stdout += f"\nError in checking the scores.csv file: {e}\nscores.csv's content:\n-----\n{score_fp.read_text()}\n-----" + score_ret_code = 1 # Check submission file - submission_fp = implementation.workspace_path / "submission.csv" - if not submission_fp.exists(): - stdout += "\nSubmission file (submission.csv) is not generated." - else: - base_check_code = (DIRNAME / "eval_tests" / "submission_format_test.txt").read_text() - implementation.inject_files(**{"test/submission_format_test.py": base_check_code}) - # stdout += "----Submission Check 1-----\n" - stdout += implementation.execute(env=de, entry="python test/submission_format_test.py") - - # MLEBench Check - # !!! Since we are running on a sampled dataset, mlebench check is not required. - # mle_check_code = ( - # (DIRNAME / "eval_tests" / "mle_submission_format_test.txt") - # .read_text() - # .replace("", self.scen.competition) - # ) - # implementation.inject_files(**{"test/mle_submission_format_test.py": mle_check_code}) - # stdout += "----Submission Check 2-----\n" - # stdout += implementation.execute(env=mde, entry=f"python test/mle_submission_format_test.py") + base_check_code = (DIRNAME / "eval_tests" / "submission_format_test.txt").read_text() + implementation.inject_files(**{"test/submission_format_test.py": base_check_code}) + # stdout += "----Submission Check 1-----\n" + submission_stdout, submission_ret_code = implementation.execute_ret_code( + env=de, entry="python test/submission_format_test.py" + ) + stdout += submission_stdout + + # MLEBench Check + # !!! Since we are running on a sampled dataset, mlebench check is not required. + # mle_check_code = ( + # (DIRNAME / "eval_tests" / "mle_submission_format_test.txt") + # .read_text() + # .replace("", self.scen.competition) + # ) + # implementation.inject_files(**{"test/mle_submission_format_test.py": mle_check_code}) + # stdout += "----Submission Check 2-----\n" + # stdout += implementation.execute(env=mde, entry=f"python test/mle_submission_format_test.py") system_prompt = T(".prompts:workflow_eval.system").r( scenario=self.scen.get_scenario_all_desc(), @@ -122,6 +125,8 @@ def evaluate( stdout=stdout.strip(), code=implementation.file_dict["main.py"], ) - return build_cls_from_json_with_retry( + wfb = build_cls_from_json_with_retry( WorkflowSingleFeedback, system_prompt=system_prompt, user_prompt=user_prompt ) + wfb.final_decision = wfb.final_decision and submission_ret_code == 0 and score_ret_code == 0 + return wfb diff --git a/rdagent/components/coder/data_science/workflow/eval_tests/submission_format_test.txt b/rdagent/components/coder/data_science/workflow/eval_tests/submission_format_test.txt index ba9cf4a5a..a866479e5 100644 --- a/rdagent/components/coder/data_science/workflow/eval_tests/submission_format_test.txt +++ b/rdagent/components/coder/data_science/workflow/eval_tests/submission_format_test.txt @@ -20,9 +20,7 @@ input_dir = Path("/kaggle/input") sample_submission_files = list(input_dir.glob("*sample_submission*.csv")) + \ list(input_dir.glob("*sampleSubmission*.csv")) -if not sample_submission_files: - print("Error: No sample submission file found in /kaggle/input/") - exit(1) +assert sample_submission_files, "Error: No sample submission file found in /kaggle/input/" # Use first matching file sample_submission_name = sample_submission_files[0].name @@ -30,10 +28,10 @@ SAMPLE_SUBMISSION_PATH = str(sample_submission_files[0]) print(f"Using sample submission file: {sample_submission_name}") # Check if the sample submission file exists -if not Path(SAMPLE_SUBMISSION_PATH).exists(): - print(f"Error: {sample_submission_name} not found at {SAMPLE_SUBMISSION_PATH}") - exit(0) +assert Path(SAMPLE_SUBMISSION_PATH).exists(), f"Error: {sample_submission_name} not found at {SAMPLE_SUBMISSION_PATH}" +# Check if our submission file exists +assert Path('submission.csv').exists(), "Error: submission.csv not found" sample_submission = pd.read_csv(SAMPLE_SUBMISSION_PATH) our_submission = pd.read_csv('submission.csv')