eosphoros-ai
diff --git a/‎packages/dbgpt-core/src/dbgpt/util/benchmarks/StorageUtil.py‎
Lines changed: 4 additions & 0 deletions b/‎packages/dbgpt-core/src/dbgpt/util/benchmarks/StorageUtil.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/__init__.py‎
Lines changed: 0 additions & 6 deletions b/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/__init__.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py‎
Lines changed: 5 additions & 10 deletions b/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py‎
Lines changed: 5 additions & 10 deletions
diff --git a/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/ext/__init__.py‎ b/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/ext/__init__.py‎
diff --git a/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/ext/excel_file_parse.py‎
Lines changed: 176 additions & 0 deletions b/‎packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/ext/excel_file_parse.py‎
Lines changed: 176 additions & 0 deletions
@@ -10,6 +10,8 @@ class StorageUtil:
 
     YUQUE_URL_PREFIX = "https://yuque.com"
 
+    GITHUB_FALCON_PREFIX = "https://github.com/eosphoros-ai/Falcon"
+
     @staticmethod
     def get_file_parse_type(file_path: Optional[str]) -> FileParseTypeEnum:
         """Get file parsing type based on file path
@@ -28,5 +30,7 @@ def get_file_parse_type(file_path: Optional[str]) -> FileParseTypeEnum:
 
         if file_path.strip().startswith(StorageUtil.YUQUE_URL_PREFIX):
             return FileParseTypeEnum.YU_QUE
+        if file_path.strip().startswith(StorageUtil.GITHUB_FALCON_PREFIX):
+            return FileParseTypeEnum.GITHUB
 
         return FileParseTypeEnum.EXCEL
@@ -1,11 +1,5 @@
 from .benchmark_service import BenchmarkService
-from .data_compare_service import DataCompareService
-from .file_parse_service import FileParseService
-from .user_input_execute_service import UserInputExecuteService
 
 __all__ = [
     "BenchmarkService",
-    "FileParseService",
-    "UserInputExecuteService",
-    "DataCompareService",
 ]
@@ -18,6 +18,7 @@
 from dbgpt.model.cluster import WorkerManagerFactory
 from dbgpt.storage.metadata import BaseDao
 from dbgpt.util import PaginationResult, get_or_create_event_loop
+from dbgpt.util.benchmarks import StorageUtil
 from dbgpt_serve.evaluate.service.benchmark.task.benchmark_agent_task import (
     BenchmarkAgentTask,
 )
@@ -40,7 +41,6 @@
 from ...models.models import ServeDao, ServeEntity
 from ..fetchdata.benchmark_data_manager import get_benchmark_manager
 from .data_compare_service import DataCompareService
-from .ext.excel_file_parse import ExcelFileParseService
 from .models import (
     BaseInputModel,
     BenchmarkDataSets,
@@ -49,7 +49,6 @@
     BenchmarkModeTypeEnum,
     BenchmarkTaskResult,
     ContentTypeEnum,
-    FileParseTypeEnum,
     InputType,
     OutputType,
 )
@@ -61,10 +60,7 @@
 
 BENCHMARK_SERVICE_COMPONENT_NAME = "dbgpt_serve_evaluate_benchmark_service"
 
-STANDARD_BENCHMARK_FILE_PATH = os.path.join(
-    BENCHMARK_DATA_ROOT_PATH,
-    "2025_07_27_public_500_standard_benchmark_question_list.xlsx",
-)
+STANDARD_BENCHMARK_FILE_PATH = "https://github.com/eosphoros-ai/Falcon"
 
 BENCHMARK_OUTPUT_RESULT_PATH = os.path.join(BENCHMARK_DATA_ROOT_PATH, "result")
 
@@ -94,11 +90,10 @@ def __init__(
         super().__init__(system_app)
         self.rag_service = get_rag_service(system_app)
         self.prompt_service = get_prompt_service(system_app)
-        self._file_parse_type = FileParseTypeEnum.EXCEL
+        self._file_parse_type = StorageUtil.get_file_parse_type(STANDARD_BENCHMARK_FILE_PATH)
 
-        fps = ExcelFileParseService()
         dcs = DataCompareService()
-        self.user_input_execute_service = UserInputExecuteService(fps, dcs)
+        self.user_input_execute_service = UserInputExecuteService(dcs, self._file_parse_type)
 
         self.trigger_executor = ThreadPoolExecutor(
             max_workers=5, thread_name_prefix="benchmark-fileWrite"
@@ -289,7 +284,7 @@ async def run_dataset_benchmark(
             await manager.load_data()
             logger.info(
                 f"Benchmark dataset loaded from {manager._config.repo_url} "
-                f"dir={manager._config.data_dir}"
+                f"dir={manager._config.data_dirs}"
             )
         except Exception as e:
             logger.error(
 
@@ -0,0 +1,176 @@
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import pandas as pd
+from openpyxl import Workbook, load_workbook
+
+from dbgpt.util.benchmarks.ExcelUtils import ExcelUtils
+
+from ..file_parse_service import FileParseService
+from ..models import (
+    AnswerExecuteModel,
+    BaseInputModel,
+    BenchmarkDataSets,
+    DataCompareStrategyConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class ExcelFileParseService(FileParseService):
+    def parse_input_sets(self, path: str) -> BenchmarkDataSets:
+        """
+        Parse input sets from excel file
+        Args:
+            path: File location path
+        Returns:
+            BenchmarkDataSets: Parsed data sets
+        """
+        input_stream = self.get_input_stream(path)
+
+        if input_stream is None:
+            raise RuntimeError(f"file not found! path: {path}")
+
+        # Parse excel file to get data sets
+        input_sets = BenchmarkDataSets()
+        workbook = None
+
+        try:
+            workbook = load_workbook(input_stream, data_only=True)
+            input_list = []
+
+            # Get the first worksheet
+            sheet = workbook.worksheets[0]
+
+            for row_num in range(
+                2, sheet.max_row + 1
+            ):  # Skip header row (start from 1-based index)
+                row = sheet[row_num]
+                if ExcelUtils.is_row_empty(row):
+                    continue
+
+                # Get content from columns 1-6 (0-based index 0-5)
+                serial_no_cell = row[0]
+                analysis_model_id_cell = row[1]
+                question_cell = row[2]
+                self_define_tags_cell = row[3]
+                knowledge_cell = row[4]
+                llm_output_cell = row[5]
+                prompt_cell = row[8]
+
+                # Build input model
+                input_model = BaseInputModel(
+                    serial_no=int(
+                        ExcelUtils.get_cell_value_as_string(serial_no_cell) or "0"
+                    ),
+                    analysis_model_id=ExcelUtils.get_cell_value_as_string(
+                        analysis_model_id_cell
+                    ),
+                    question=ExcelUtils.get_cell_value_as_string(question_cell),
+                    self_define_tags=ExcelUtils.get_cell_value_as_string(
+                        self_define_tags_cell
+                    ),
+                    llm_output=ExcelUtils.get_cell_value_as_string(llm_output_cell),
+                    knowledge=ExcelUtils.get_cell_value_as_string(knowledge_cell),
+                    prompt=ExcelUtils.get_cell_value_as_string(prompt_cell),
+                )
+
+                input_list.append(input_model)
+
+            input_sets.data_list = input_list
+        except Exception as e:
+            logger.error(f"parse excel error, path: {path}, errorMsg: {e}")
+        finally:
+            try:
+                if workbook is not None:
+                    workbook.close()
+            except Exception as e:
+                logger.error(f"close workbook error, path: {path}, errorMsg: {e}")
+
+        return input_sets
+
+    def parse_standard_benchmark_sets(
+        self, standard_excel_path: str
+    ) -> List[AnswerExecuteModel]:
+        df = pd.read_excel(standard_excel_path, sheet_name=0)
+        outputs: List[AnswerExecuteModel] = []
+        for _, row in df.iterrows():
+            try:
+                serial_no = int(row["编号"])
+            except Exception:
+                continue
+            question = row.get("用户问题")
+            analysis_model_id = row.get("数据集ID")
+            llm_output = (
+                None if pd.isna(row.get("标准答案SQL")) else str(row.get("标准答案SQL"))
+            )
+            order_by = True
+            if not pd.isna(row.get("是否排序")):
+                try:
+                    order_by = bool(int(row.get("是否排序")))
+                except Exception:
+                    order_by = True
+
+            std_result: Optional[List[Dict[str, List[str]]]] = None
+            if not pd.isna(row.get("标准结果")):
+                std_result_raw = str(row.get("标准结果"))
+                std_result = self._parse_multi_standard_result(std_result_raw)
+
+            strategy_config = DataCompareStrategyConfig(
+                strategy="CONTAIN_MATCH",
+                order_by=order_by,
+                standard_result=std_result if std_result is not None else None,
+            )
+            outputs.append(
+                AnswerExecuteModel(
+                    serialNo=serial_no,
+                    analysisModelId=analysis_model_id,
+                    question=question,
+                    llmOutput=llm_output,
+                    executeResult=std_result,
+                    strategyConfig=strategy_config,
+                )
+            )
+        return outputs
+
+    def _parse_multi_standard_result(
+        self, std_result_raw: str
+    ) -> Optional[List[Dict[str, List[str]]]]:
+        """
+        Parse multiple standard results from raw string data.
+
+        Handles multiple results separated by newlines and parses each line as a dict.
+
+        Args:
+            std_result_raw (str): Raw standard result string with multiple lines
+
+        Returns:
+            Optional[List[Dict[str, List[str]]]]: List of parsed dictionaries,
+            or None if parsing fails or no valid data
+        """
+        try:
+            std_result_raw = std_result_raw.strip()
+            if not std_result_raw:
+                return None
+
+            # 处理多个结果，通过换行符分隔
+            result_lines = std_result_raw.split("\n")
+            result_list = []
+
+            for line in result_lines:
+                line = line.strip()
+                if line:
+                    try:
+                        result_list.append(json.loads(line))
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to parse line as JSON: {line}, error: {e}"
+                        )
+                        continue
+
+            return result_list if result_list else None
+        except Exception as e:
+            logger.error(f"parse multiple standard results error: {e}")
+            return None