Skip to content

Commit cc0a86d

Browse files
authored
avoid_generating_more_column_than_data (microsoft#332)
1 parent 6d5efa8 commit cc0a86d

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

rdagent/components/coder/factor_coder/CoSTEER/evaluators.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,13 @@ def evaluate(
394394
if version == 1:
395395
feedback_str, _ = FactorSingleColumnEvaluator(self.scen).evaluate(implementation, gt_implementation)
396396
conclusions.append(feedback_str)
397+
elif version == 2:
398+
input_shape = self.scen.input_shape
399+
_, gen_df = self._get_df(gt_implementation, implementation)
400+
if gen_df.shape[-1] > input_shape[-1]:
401+
conclusions.append(
402+
"Output dataframe has more columns than input feature which is not acceptable in feature processing tasks. Please check the implementation to avoid generating too many columns. Consider this implementation as a failure."
403+
)
397404

398405
# Check if the index of the dataframe is ("datetime", "instrument")
399406
feedback_str, _ = FactorOutputFormatEvaluator(self.scen).evaluate(implementation, gt_implementation)

rdagent/scenarios/kaggle/experiment/scenario.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ def source_data(self) -> str:
128128
pickle.dump(X_test, open(data_folder / "X_test.pkl", "wb"))
129129
pickle.dump(others, open(data_folder / "others.pkl", "wb"))
130130

131+
self.input_shape = X_train.shape
132+
131133
buffer = io.StringIO()
132134
X_valid.info(verbose=True, buf=buffer, show_counts=True)
133135
data_info = buffer.getvalue()

0 commit comments

Comments
 (0)