refactor: use dynamic input path and update template loader (microsoft#792)

you-n-g · web-flow · commit ed87a480128b · 2025-04-16T18:11:46.000+08:00
* refactor: use dynamic input path and update template loader

* fix: update include syntax for data source in prompts.yaml

* add customization path

* docs: update prompts for ensemble scoring and metric direction

* chore: remove obsolete data_science/share.yaml file
diff --git a/rdagent/components/coder/data_science/ensemble/eval.py b/rdagent/components/coder/data_science/ensemble/eval.py
@@ -47,7 +47,11 @@ def evaluate(
             )
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            }
         )
 
         fname = "test/ensemble_test.txt"
diff --git a/rdagent/components/coder/data_science/feature/eval.py b/rdagent/components/coder/data_science/feature/eval.py
@@ -43,7 +43,11 @@ def evaluate(
             )
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            }
         )
 
         # TODO: do we need to clean the generated temporary content?
diff --git a/rdagent/components/coder/data_science/model/eval.py b/rdagent/components/coder/data_science/model/eval.py
@@ -57,7 +57,11 @@ def evaluate(
             )
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            }
         )
 
         if_model_removed = False
diff --git a/rdagent/components/coder/data_science/pipeline/eval.py b/rdagent/components/coder/data_science/pipeline/eval.py
@@ -52,7 +52,11 @@ def evaluate(
             )
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            }
         )
 
         # Clean the scores.csv & submission.csv.
@@ -97,7 +101,7 @@ def evaluate(
                 score_ret_code = 1
 
         # Check submission file
-        base_check_code = (DIRNAME / "eval_tests" / "submission_format_test.txt").read_text()
+        base_check_code = T(".eval_tests.submission_format_test", ftype="txt").r()
         implementation.inject_files(**{"test/submission_format_test.py": base_check_code})
         # stdout += "----Submission Check 1-----\n"
         submission_check_out, submission_ret_code = implementation.execute_ret_code(
diff --git a/rdagent/components/coder/data_science/pipeline/eval_tests/submission_format_test.txt b/rdagent/components/coder/data_science/pipeline/eval_tests/submission_format_test.txt
@@ -25,13 +25,13 @@ find . | grep -i sample | grep -i submission | grep -v sample_submission.csv | g
 """
 
 # Find sample submission file dynamically
-input_dir = Path("/kaggle/input")
+input_dir = Path("{% include "scenarios.data_science.share:scen.input_path" %}")
 # Look for common variations of sample submission filenames
 sample_submission_files = list(input_dir.glob("*sample_submission*.csv")) + list(
     input_dir.glob("*sampleSubmission*.csv")
 )
 
-assert sample_submission_files, "Error: No sample submission file found in /kaggle/input/"
+assert sample_submission_files, "Error: No sample submission file found in {% include "scenarios.data_science.share:scen.input_path" %}"
 
 # Use first matching file
 sample_submission_name = sample_submission_files[0].name
diff --git a/rdagent/components/coder/data_science/pipeline/prompts.yaml b/rdagent/components/coder/data_science/pipeline/prompts.yaml
@@ -37,7 +37,7 @@ pipeline_coder:
 
 
     ## Guidelines
-    1. Ensure that the dataset is loaded strictly from `/kaggle/input/`, following the exact folder structure described in the **Data Folder Description**, and do not attempt to load data from the current directory (`./`).
+    1. Ensure that the dataset is loaded strictly from `{% include "scenarios.data_science.share:scen.input_path" %}`, following the exact folder structure described in the **Data Folder Description**, and do not attempt to load data from the current directory (`./`).
     2. You should avoid using logging module to output information in your generated code, and instead use the print() function.
     
     ## Exploratory Data Analysis (EDA) part(Required):
diff --git a/rdagent/components/coder/data_science/raw_data_loader/__init__.py b/rdagent/components/coder/data_science/raw_data_loader/__init__.py
@@ -226,7 +226,11 @@ def develop(self, exp):
         new_exp = super().develop(exp)
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": "/kaggle/input"},
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            },
             running_timeout_period=DS_RD_SETTING.full_timeout,
         )
 
diff --git a/rdagent/components/coder/data_science/raw_data_loader/eval.py b/rdagent/components/coder/data_science/raw_data_loader/eval.py
@@ -46,7 +46,11 @@ def evaluate(
             )
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            }
         )
 
         # TODO: do we need to clean the generated temporary content?
diff --git a/rdagent/components/coder/data_science/raw_data_loader/prompts.yaml b/rdagent/components/coder/data_science/raw_data_loader/prompts.yaml
@@ -48,7 +48,7 @@ spec:
           - `test_ids` (DT): Identifiers for the test data.
         - Docstring Requirements:
           - Describe the purpose of the function.
-          - Specify the data source location (`/kaggle/input/`).
+          - Specify the data source location (`{% include "scenarios.data_science.share:scen.input_path" %}`).
           - Clearly define the structure and type of the output.
           - Inferred data shape to each input and output data variables. To uncertain dimension, use -1.
       2. Notes:
@@ -268,7 +268,7 @@ data_loader_coder:
     {% endif %}
 
     ## Guidelines
-    1. Ensure that the dataset is loaded strictly from `/kaggle/input/`, following the exact folder structure described in the **Data Folder Description**, and do not attempt to load data from the current directory (`./`).
+    1. Ensure that the dataset is loaded strictly from `{% include "scenarios.data_science.share:scen.input_path" %}`, following the exact folder structure described in the **Data Folder Description**, and do not attempt to load data from the current directory (`./`).
     2. You should avoid using logging module to output information in your generated code, and instead use the print() function.
     3. You should use the following cache decorator to cache the results of the function:
     ```python
diff --git a/rdagent/components/coder/data_science/share/eval.py b/rdagent/components/coder/data_science/share/eval.py
@@ -45,7 +45,9 @@ def evaluate(
             )
         env = get_ds_env()
         env.conf.extra_volumes = {
-            f"{DS_RD_SETTING.local_data_path}/{'sample/' if self.data_type == 'sample' else ''}{self.scen.competition}": "/kaggle/input"
+            f"{DS_RD_SETTING.local_data_path}/{'sample/' if self.data_type == 'sample' else ''}{self.scen.competition}": T(
+                "scenarios.data_science.share:scen.input_path"
+            ).r()
         }
 
         # 2) check the result and stdout after reruning the model.
diff --git a/rdagent/components/coder/data_science/share/prompts.yaml b/rdagent/components/coder/data_science/share/prompts.yaml
@@ -55,7 +55,7 @@ dump_model_eval:
 
 docdev:
   system: |-
-    You are a skilled developer and a Kaggle grandmaster. Your task is to create documentation for a data science solution.
+    {% include "scenarios.data_science.share:scen.role" %}  Your task is to create documentation for a data science solution.
 
     You will be given:
     - a list of files in the folder.
diff --git a/rdagent/components/coder/data_science/workflow/eval.py b/rdagent/components/coder/data_science/workflow/eval.py
@@ -55,7 +55,11 @@ def evaluate(
             )
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            }
         )
 
         # # DockerEnv for MLEBench submission validation
@@ -119,7 +123,7 @@ def evaluate(
                 score_ret_code = 1
 
         # Check submission file
-        base_check_code = (DIRNAME / "eval_tests" / "submission_format_test.txt").read_text()
+        base_check_code = T(".eval_tests.submission_format_test", ftype="txt").r()
         implementation.inject_files(**{"test/submission_format_test.py": base_check_code})
         # stdout += "----Submission Check 1-----\n"
         submission_check_out, submission_ret_code = implementation.execute_ret_code(
diff --git a/rdagent/components/coder/data_science/workflow/eval_tests/submission_format_test.txt b/rdagent/components/coder/data_science/workflow/eval_tests/submission_format_test.txt
@@ -22,12 +22,12 @@ find . | grep -i sample | grep -i submission | grep -v sample_submission.csv | g
 """
 
 # Find sample submission file dynamically
-input_dir = Path("/kaggle/input")
+input_dir = Path("{% include "scenarios.data_science.share:scen.input_path" %}")
 # Look for common variations of sample submission filenames
 sample_submission_files = list(input_dir.glob("*sample_submission*.csv")) + \
                          list(input_dir.glob("*sampleSubmission*.csv"))
 
-assert sample_submission_files, "Error: No sample submission file found in /kaggle/input/"
+assert sample_submission_files, "Error: No sample submission file found in {% include "scenarios.data_science.share:scen.input_path" %}"
 
 # Use first matching file
 sample_submission_name = sample_submission_files[0].name
diff --git a/rdagent/scenarios/data_science/dev/prompts.yaml b/rdagent/scenarios/data_science/dev/prompts.yaml
@@ -33,8 +33,10 @@ exp_feedback:
     Step 3: Analyze Experimental Results (if format and evaluation alignment correct)
     - Explicitly confirm or refute the hypothesis with precise data points or performance trends.
     - Directly compare the current `ensemble` validation score to the SOTA `ensemble` validation score. Do not focus on individual models unless anomalies are significant.
-    - If current `ensemble` validation score surpasses SOTA, set `"Replace Best Result": "yes"`; otherwise, set as "no".
+    - If current `ensemble` validation score surpasses SOTA `ensemble` validation score, set `"Replace Best Result": "yes"`; otherwise, set as "no".
     - Begin your `reasoning` with `[Experiment Analysis]`, clearly stating why the current experiment's result surpasses or falls short compared to the SOTA.
+    - NOTES:
+      - The experiments focus on the final ensemble results (Don't reject the results because they are still not perfect; e.g., the ensemble does not improve performance due to having only one model. We can improve that later. We are comparing the current ensemble validation score to the SOTA ensemble validation score to check if the overall performance is improved.)
  
     Provide detailed and constructive feedback structured as follows:
     Example JSON Structure for Result Analysis:
diff --git a/rdagent/scenarios/data_science/dev/runner/eval.py b/rdagent/scenarios/data_science/dev/runner/eval.py
@@ -34,7 +34,11 @@ def evaluate(
     ) -> DSCoSTEEREvalFeedback:
 
         env = get_ds_env(
-            extra_volumes={f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": "/kaggle/input"},
+            extra_volumes={
+                f"{DS_RD_SETTING.local_data_path}/{self.scen.competition}": T(
+                    "scenarios.data_science.share:scen.input_path"
+                ).r()
+            },
             running_timeout_period=DS_RD_SETTING.full_timeout,
         )
 
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/naive.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/naive.yaml
@@ -1,6 +1,7 @@
 naive_gen:
   system: |-
-    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+    {% include "scenarios.data_science.share:scen.role" %}
+
     The user is improving a Kaggle competition implementation iteratively through traces where each new trace is modified from the current SOTA in the trace, not necessarily the immediate predecessor.
     You will be given a competition scenario, previous SOTA (best) and failed experiments and feedbacks, the current SOTA implementation and feedback, and a list of identified problems.
 
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/prompts.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/prompts.yaml
@@ -155,7 +155,8 @@ task_gen_model: # It is deprecated now, please refer to direct_exp_gen
 
 direct_exp_gen:
   system: |-
-    You are a world-class data scientist and machine learning engineer with deep expertise in statistics, mathematics, and computer science, and also a grandmaster in Kaggle competitions.
+    {% include "scenarios.data_science.share:scen.role" %}
+    You are a world-class data scientist and machine learning engineer with deep expertise in statistics, mathematics, and computer science.
     Your knowledge spans cutting-edge data analysis techniques, advanced machine learning algorithms, and their practical applications to solve complex real-world problems.
     
     The user is working on creating a solution for a Kaggle competition. Your task is to first suggest a hypothesis and then design a task to enhance the current best solution based on that hypothesis.
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml b/rdagent/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml
@@ -1,6 +1,6 @@
 scenario_problem:
   system: |-
-    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+    {% include "scenarios.data_science.share:scen.role" %}
     You will be given scenario and competition description and the current SOTA implementation and feedback.
     Your task is to analyze the given information and extract the **Scenario Problems** from the given materials.
 
@@ -33,7 +33,7 @@ scenario_problem:
 
 feedback_problem:
   system: |-
-    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+    {% include "scenarios.data_science.share:scen.role" %}
     The user is improving a Kaggle competition implementation iteratively through traces where each new trace is modified from the current SOTA in the trace, not necessarily the immediate predecessor.
     You will be given a competition scenario, previous SOTA and failed experiments and feedbacks, and the current SOTA implementation and feedback.
     Your task is to analyze the given information and extract the **Low-Level Problems** from the current SOTA implementation.
@@ -60,7 +60,7 @@ feedback_problem:
 
 hypothesis_gen:
   system: |-
-    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+    {% include "scenarios.data_science.share:scen.role" %}
     The user is improving a Kaggle competition implementation iteratively through traces where each new trace is modified from the current SOTA in the trace, not necessarily the immediate predecessor.
     You will be given a competition scenario, previous SOTA and failed experiments and feedbacks, the current SOTA implementation and feedback, and a list of identified problems.
     Your role involves two tasks:
@@ -126,7 +126,7 @@ hypothesis_gen:
 
 task_gen:
   system: |-
-    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+    {% include "scenarios.data_science.share:scen.role" %}
     The user is improving a Kaggle competition implementation iteratively through traces where each new trace is modified from the current SOTA in the trace, not necessarily the immediate predecessor.
     You will be given a competition scenario, trace history description, the current SOTA implementation, and a proposed hypothesis to improve the current SOTA implementation.
     
diff --git a/rdagent/scenarios/data_science/scen/__init__.py b/rdagent/scenarios/data_science/scen/__init__.py
@@ -8,12 +8,14 @@
 from rdagent.core.scenario import Scenario
 from rdagent.log import rdagent_logger as logger
 from rdagent.oai.llm_utils import APIBackend
+from rdagent.scenarios.data_science.debug.data import create_debug_data
 from rdagent.scenarios.data_science.scen.utils import (
     describe_data_folder,
     describe_data_folder_v2,
 )
 from rdagent.scenarios.kaggle.kaggle_crawler import (
     crawl_descriptions,
+    download_data,
     leaderboard_scores,
 )
 from rdagent.utils.agent.tpl import T
@@ -23,6 +25,17 @@ class DataScienceScen(Scenario):
     """Data Science Scenario"""
 
     def __init__(self, competition: str) -> None:
+
+        # 1) prepare data
+        if not Path(f"{DS_RD_SETTING.local_data_path}/{competition}").exists():
+            logger.error(f"Please prepare data for competition {competition} first.")
+            raise FileNotFoundError(f"Cannot find {competition} in {DS_RD_SETTING.local_data_path}")
+
+        local_path = DS_RD_SETTING.local_data_path
+        if not Path(f"{local_path}/sample/{competition}").exists():
+            create_debug_data(competition, dataset_path=local_path)
+
+        # 2) collect information of competition.
         self.metric_name: str | None = (
             None  # It is None when initialization. After analysing, we'll assign the metric name
         )
@@ -36,13 +49,15 @@ def __init__(self, competition: str) -> None:
         )  # True indicates higher is better, False indicates lower is better
 
     def _get_description(self):
-        if (fp := Path(f"{DS_RD_SETTING.local_data_path}/{self.competition}.json")).exists():
+        if (fp := Path(f"{DS_RD_SETTING.local_data_path}/{self.competition}/description.md")).exists():
+            return fp.read_text()
+        elif (fp := Path(f"{DS_RD_SETTING.local_data_path}/{self.competition}.json")).exists():
             logger.info(f"Found {self.competition}.json, loading from local file.")
             with fp.open("r") as f:
                 return json.load(f)
         else:
             logger.error(
-                f"Cannot find {self.competition}.json in {DS_RD_SETTING.local_data_path}, please check the file."
+                f"Cannot find '{self.competition}.json' in {DS_RD_SETTING.local_data_path} or 'description.md' file, please check the file."
             )
 
     def _get_direction(self):
@@ -145,6 +160,10 @@ class KaggleScen(DataScienceScen):
           So we start from a simple one....
     """
 
+    def __init__(self, competition: str) -> None:
+        download_data(competition=competition, settings=DS_RD_SETTING, enable_create_debug_data=False)
+        super().__init__(competition)
+
     def _get_description(self):
         return crawl_descriptions(self.competition, DS_RD_SETTING.local_data_path)
 
diff --git a/rdagent/scenarios/data_science/scen/prompts.yaml b/rdagent/scenarios/data_science/scen/prompts.yaml
@@ -43,7 +43,7 @@ competition_description_template:
       "Submission channel number to each sample": "The number of channels in the output for each sample, e.g., 1 for regression, N for N class classification with probabilities, etc. A Integer. If not specified, it is 1."
       "Metric Evaluation Description": "A precise explanation of how the submissions are scored in this competition, including how the metric is calculated and any specific considerations.",
       "Metric Name": "The name of the metric which this competition use for scoring the submission."
-      "Metric direction": True or False as True means bigger metric number is better, False means smaller is better.
+      "Metric Direction": True or False as True means bigger metric number is better, False means smaller is better.
     }
   user: |-
     Competition Description: 
diff --git a/rdagent/scenarios/data_science/share.yaml b/rdagent/scenarios/data_science/share.yaml
@@ -88,6 +88,11 @@ describe: # some template to describe some object
     {% endif %}
 
 
+scen:  # customizable
+  role: |-
+    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+  input_path: "/kaggle/input"
+
 component_description:
   DataLoadSpec: |-
     Loads raw competition data, ensuring proper data types, and providing an exploratory data analysis summary.
diff --git a/rdagent/scenarios/kaggle/kaggle_crawler.py b/rdagent/scenarios/kaggle/kaggle_crawler.py
@@ -108,7 +108,9 @@ def kaggle_description_css_selectors() -> tuple[str, str]:
     return descriptions
 
 
-def download_data(competition: str, settings: ExtendedBaseSettings = KAGGLE_IMPLEMENT_SETTING) -> None:
+def download_data(
+    competition: str, settings: ExtendedBaseSettings = KAGGLE_IMPLEMENT_SETTING, enable_create_debug_data: bool = True
+) -> None:
     local_path = settings.local_data_path
     if settings.if_using_mle_data:
         zipfile_path = f"{local_path}/zip_files"
@@ -177,7 +179,7 @@ def download_data(competition: str, settings: ExtendedBaseSettings = KAGGLE_IMPL
                     unzip_data(sub_zip_file, unzip_target_path=unzip_path)
 
     # sample data
-    if not Path(f"{local_path}/sample/{competition}").exists():
+    if enable_create_debug_data and not Path(f"{local_path}/sample/{competition}").exists():
         create_debug_data(competition, dataset_path=local_path)
 
 
diff --git a/rdagent/utils/agent/tpl.py b/rdagent/utils/agent/tpl.py

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,11 @@ def evaluate(`
`47`	`47`	`)`
`48`	`48`
`49`	`49`	`env = get_ds_env(`
`50`		`- extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}`
	`50`	`+ extra_volumes={`
	`51`	`+ f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(`
	`52`	`+ "scenarios.data_science.share:scen.input_path"`
	`53`	`+ ).r()`
	`54`	`+ }`
`51`	`55`	`)`
`52`	`56`
`53`	`57`	`fname = "test/ensemble_test.txt"`
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,11 @@ def evaluate(`
`43`	`43`	`)`
`44`	`44`
`45`	`45`	`env = get_ds_env(`
`46`		`- extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}`
	`46`	`+ extra_volumes={`
	`47`	`+ f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(`
	`48`	`+ "scenarios.data_science.share:scen.input_path"`
	`49`	`+ ).r()`
	`50`	`+ }`
`47`	`51`	`)`
`48`	`52`
`49`	`53`	`# TODO: do we need to clean the generated temporary content?`
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,11 @@ def evaluate(`
`57`	`57`	`)`
`58`	`58`
`59`	`59`	`env = get_ds_env(`
`60`		`- extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}`
	`60`	`+ extra_volumes={`
	`61`	`+ f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(`
	`62`	`+ "scenarios.data_science.share:scen.input_path"`
	`63`	`+ ).r()`
	`64`	`+ }`
`61`	`65`	`)`
`62`	`66`
`63`	`67`	`if_model_removed = False`
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,11 @@ def evaluate(`
`46`	`46`	`)`
`47`	`47`
`48`	`48`	`env = get_ds_env(`
`49`		`- extra_volumes={f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}`
	`49`	`+ extra_volumes={`
	`50`	`+ f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": T(`
	`51`	`+ "scenarios.data_science.share:scen.input_path"`
	`52`	`+ ).r()`
	`53`	`+ }`
`50`	`54`	`)`
`51`	`55`
`52`	`56`	`# TODO: do we need to clean the generated temporary content?`
Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,9 @@ def evaluate(`
`45`	`45`	`)`
`46`	`46`	`env = get_ds_env()`
`47`	`47`	`env.conf.extra_volumes = {`
`48`		`- f"{DS_RD_SETTING.local_data_path}/{'sample/' if self.data_type == 'sample' else ''}{self.scen.competition}": "/kaggle/input"`
	`48`	`+ f"{DS_RD_SETTING.local_data_path}/{'sample/' if self.data_type == 'sample' else ''}{self.scen.competition}": T(`
	`49`	`+ "scenarios.data_science.share:scen.input_path"`
	`50`	`+ ).r()`
`49`	`51`	`}`
`50`	`52`
`51`	`53`	`# 2) check the result and stdout after reruning the model.`