Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix some bugs
  • Loading branch information
WinstonLiyt committed Mar 27, 2025
commit df3d4ac03cbcfd338e5488f904a604d166bfe800
4 changes: 3 additions & 1 deletion rdagent/components/coder/data_science/ensemble/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def evaluate(

target_task_information = target_task.get_task_information()
metric_name = self.scen.metric_name

if (
queried_knowledge is not None
and target_task_information in queried_knowledge.success_task_to_knowledge_dict
Expand All @@ -56,7 +57,8 @@ def evaluate(
.render(
model_names=[
fn[:-3] for fn in implementation.file_dict.keys() if fn.startswith("model_") and "test" not in fn
]
],
metric_name=metric_name,
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,5 +122,7 @@ assert model_set_in_scores == set({{model_names}}).union({"ensemble"}), (
)
assert score_df.index.is_unique, "The scores dataframe has duplicate model names."
assert len(score_df.columns) == 1, f"The scores dataframe should have exactly one column for the scores of the evaluation indicator, but has these columns: {score_df.columns.tolist()}"
assert score_df.columns[0] == {{metric_name}}, f"The column name of the scores in the scores dataframe should be {{{metric_name}}}, but is '{score_df.columns[0]}'"


print("Ensemble test end.")
8 changes: 4 additions & 4 deletions rdagent/scenarios/data_science/scen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,11 @@ def _analysis_competition_description(self):
self.data_type = response_json_analysis.get("Data Type", "No data type provided")
self.brief_description = response_json_analysis.get("Brief Description", "No brief description provided")
self.dataset_description = response_json_analysis.get("Dataset Description", "No dataset description provided")
self.target_description = response_json_analysis.get("Evaluation Description", "No target description provided")
self.submission_specifications = response_json_analysis.get(
"Submission Specifications", "No submission requirements provided"
)
self.model_output_channel = response_json_analysis.get("Submission channel number to each sample", 1)
self.metric_description = response_json_analysis.get("Metric Evaluation Description", "No target description provided")
self.metric_name = response_json_analysis.get("Metric Name", "No metric name provided")
self.metric_direction_guess = response_json_analysis.get("Metric Direction", True)

Expand All @@ -280,9 +280,9 @@ def get_competition_full_desc(self) -> str:
Data Type: {self.data_type}
Brief Description: {self.brief_description}
Dataset Description: {self.dataset_description}
Target Description: {self.target_description}
Submission Specifications: {self.submission_specifications}
Model Output Channel: {self.model_output_channel}
Metric Evaluation Description: {self.metric_description}
Metric Name: {self.metric_name}
"""

Expand All @@ -294,7 +294,7 @@ def background(self) -> str:
data_type=self.data_type,
brief_description=self.brief_description,
dataset_description=self.dataset_description,
target_description=self.target_description,
metric_description=self.metric_description,
)
return background_prompt

Expand All @@ -309,7 +309,7 @@ def get_scenario_all_desc(self) -> str:
return T(".prompts:scenario_description").r(
background=self.background,
submission_specifications=self.submission_specifications,
evaluation=self.target_description,
evaluation=self.metric_description,
metric_name=self.metric_name,
metric_direction=self.metric_direction,
eda_output=self.eda_output,
Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/data_science/scen/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ competition_description_template:
"Data Type": "The type of competition data, e.g., 'Tabular', 'Time Series', 'Text (Natural Language Processing)', 'Image (Computer Vision)', 'Audio', 'Video'",
"Brief Description": "A brief description of the competition",
"Dataset Description": "The dataset utilized in the competition is described based on two sources: the Competition Description, which provides contextual details about the original files, and the Processed Data folder description, which outlines the structure of the dataset after processing. While there may be differences—for instance, original files mentioned in the Competition Description (e.g., .zip files) may have been extracted or restructured—your task is to interpret the new file structure accurately (do not contain any file or folder that is not in Processed Data folder description) and reconcile it with the contextual information from the Competition Description to provide a clear and updated explanation.",
"Evaluation Description": "A description of the evaluation used in the competition.",
"Submission Specifications": "The submission specification & sample submission file descriptions for the model to output."
"Submission channel number to each sample": "The number of channels in the output for each sample, e.g., 1 for regression, N for N class classification with probabilities, etc. A Integer. If not specified, it is 1."
"Metric Evaluation Description": "A precise explanation of how the submissions are scored in this competition, including how the metric is calculated and any specific considerations.",
"Metric Name": "The name of the metric which this competition use for scoring the submission."
"Metric direction": True or False as True means bigger metric number is better, False means smaller is better.
}
Expand All @@ -58,7 +58,7 @@ competition_background: |-
The data type used in this competition is {{ data_type }}.
Briefly, the competition involves: {{ brief_description }}.
The dataset used in this competition is: {{ dataset_description }}.
Your goal in this competition is to: {{ target_description }}.
The evaluation metric of this competition is: {{ metric_description }}.

rich_style_description: |-
### {{ name }} Agent: Automated Feature Engineering & Model Tuning Evolution
Expand Down
2 changes: 1 addition & 1 deletion rdagent/scenarios/kaggle/experiment/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ kg_description_template:
"Competition Features": "Two-line description of the overall features involved within the competition as background."
"Submission Specifications": "The submission specification & sample submission csv descriptions for the model to output."
"Submission channel number to each sample": "The number of channels in the output for each sample, e.g., 1 for regression, N for N class classification with probabilities, etc. A Integer. If not specified, it is 1."
"Evaluation Description": "A brief description of the metrics used in the evaluation. Please note that if `evaluation_metric_direction` is True, it indicates that higher values are better; if False, lower values are preferred."
"Metric Evaluation Description": "A brief description of the metrics used in the evaluation. Please note that if `evaluation_metric_direction` is True, it indicates that higher values are better; if False, lower values are preferred."
}
Since these might be very similar column names in data like one_hot_encoded columns, you can use some regex to group them together.

Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/kaggle/experiment/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _analysis_competition_description(self):
)
self.model_output_channel = response_json_analysis.get("Submission channel number to each sample", 1)
self.evaluation_desc = response_json_analysis.get(
"Evaluation Description", "No evaluation specification provided."
"Metric Evaluation Description", "No evaluation specification provided."
)

def get_competition_full_desc(self) -> str:
Expand All @@ -118,7 +118,7 @@ def get_competition_full_desc(self) -> str:
Competition Features: {self.competition_features}
Submission Specifications: {self.submission_specifications}
Model Output Channel: {self.model_output_channel}
Evaluation Descriptions: {self.evaluation_desc}
Metric Evaluation Description: {self.evaluation_desc}
Is the evaluation metric the higher the better: {evaluation_direction}
"""

Expand Down
Loading