eosphoros-ai
diff --git a/‎packages/dbgpt-app/src/dbgpt_app/component_configs.py‎
Lines changed: 15 additions & 12 deletions b/‎packages/dbgpt-app/src/dbgpt_app/component_configs.py‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎packages/dbgpt-app/src/dbgpt_app/operators/datasource.py‎
Lines changed: 36 additions & 3 deletions b/‎packages/dbgpt-app/src/dbgpt_app/operators/datasource.py‎
Lines changed: 36 additions & 3 deletions
diff --git a/‎packages/dbgpt-app/src/dbgpt_app/operators/report.py‎
Lines changed: 233 additions & 0 deletions b/‎packages/dbgpt-app/src/dbgpt_app/operators/report.py‎
Lines changed: 233 additions & 0 deletions
diff --git a/‎packages/dbgpt-core/src/dbgpt/util/pd_utils.py‎
Lines changed: 22 additions & 0 deletions b/‎packages/dbgpt-core/src/dbgpt/util/pd_utils.py‎
Lines changed: 22 additions & 0 deletions
@@ -137,18 +137,21 @@ def _initialize_openapi(system_app: SystemApp):
 
 
 def _initialize_operators():
-    from dbgpt_app.operators.code import CodeMapOperator  # noqa: F401
-    from dbgpt_app.operators.converter import StringToInteger  # noqa: F401
-    from dbgpt_app.operators.datasource import (  # noqa: F401
-        HODatasourceExecutorOperator,
-        HODatasourceRetrieverOperator,
-    )
-    from dbgpt_app.operators.llm import (  # noqa: F401
-        HOLLMOperator,
-        HOStreamingLLMOperator,
-    )
-    from dbgpt_app.operators.rag import HOKnowledgeOperator  # noqa: F401
-    from dbgpt_serve.agent.resource.datasource import DatasourceResource  # noqa: F401
+    from dbgpt.core.awel import BaseOperator
+    from dbgpt.util.module_utils import ModelScanner, ScannerConfig
+
+    modules = ["dbgpt_app.operators", "dbgpt_serve.agent.resource"]
+
+    scanner = ModelScanner[BaseOperator]()
+    registered_items = {}
+    for module in modules:
+        config = ScannerConfig(
+            module_path=module,
+            base_class=BaseOperator,
+        )
+        items = scanner.scan_and_register(config)
+        registered_items[module] = items
+    return scanner.get_registered_items()
 
 
 def _initialize_code_server(system_app: SystemApp):
 
@@ -105,6 +105,7 @@
     type=DBResource,
     description=_("The datasource to retrieve the context"),
 )
+
 _PARAMETER_PROMPT_TEMPLATE = Parameter.build_from(
     _("Prompt Template"),
     "prompt_template",
@@ -172,7 +173,7 @@
     _("SQL result"),
     "sql_result",
     str,
-    description=_("The result of the SQL execution"),
+    description=_("The result of the SQL execution(GPT-Vis format)"),
 )
 
 _INPUTS_SQL_DICT_LIST = IOField.build_from(
@@ -189,7 +190,9 @@
 class GPTVisMixin:
     async def save_view_message(self, dag_ctx: DAGContext, view: str):
         """Save the view message."""
-        await dag_ctx.save_to_share_data(BaseLLM.SHARE_DATA_KEY_MODEL_OUTPUT_VIEW, view)
+        await dag_ctx.save_to_share_data(
+            BaseLLM.SHARE_DATA_KEY_MODEL_OUTPUT_VIEW, view, overwrite=True
+        )
 
 
 class HODatasourceRetrieverOperator(MapOperator[str, HOContextBody]):
@@ -286,14 +289,36 @@ async def map(self, question: str) -> HOContextBody:
 class HODatasourceExecutorOperator(GPTVisMixin, MapOperator[dict, str]):
     """Execute the context from the datasource."""
 
+    _share_data_key = "__datasource_executor_result__"
+
+    class MarkdownMapper(MapOperator[str, str]):
+        async def map(self, context: str) -> str:
+            """Convert the result to markdown."""
+
+            from dbgpt.util.pd_utils import df_to_markdown
+
+            df = await self.current_dag_context.get_from_share_data(
+                HODatasourceExecutorOperator._share_data_key
+            )
+            return df_to_markdown(df)
+
     metadata = ViewMetadata(
         label=_("Datasource Executor Operator"),
         name="higher_order_datasource_executor_operator",
         description=_("Execute the context from the datasource."),
         category=OperatorCategory.DATABASE,
         parameters=[_PARAMETER_DATASOURCE.new()],
         inputs=[_INPUTS_SQL_DICT.new()],
-        outputs=[_OUTPUTS_SQL_RESULT.new()],
+        outputs=[
+            _OUTPUTS_SQL_RESULT.new(),
+            IOField.build_from(
+                _("Markdown result"),
+                "markdown_result",
+                str,
+                description=_("The markdown result of the SQL execution"),
+                mappers=[MarkdownMapper],
+            ),
+        ],
         tags={"order": TAGS_ORDER_HIGH},
     )
 
@@ -314,8 +339,16 @@ async def map(self, sql_dict: dict) -> str:
         sql = sql_dict.get("sql")
         if not sql:
             return sql_dict.get("thoughts", "No SQL found in the input dictionary.")
+
+        thoughts = sql_dict.get("thoughts", "")
+
         data_df = await self._datasource.query_to_df(sql)
+        # Save the result to share data, for markdown mapper
+        await self.current_dag_context.save_to_share_data(
+            HODatasourceExecutorOperator._share_data_key, data_df
+        )
         view = await vis.display(chart=sql_dict, data_df=data_df)
+        view = thoughts + "\n\n" + view
         await self.save_view_message(self.current_dag_context, view)
         return view
 
 
@@ -0,0 +1,233 @@
+from functools import cache
+from typing import Optional
+
+from dbgpt.core import (
+    ChatPromptTemplate,
+    HumanPromptTemplate,
+    LLMClient,
+    ModelMessage,
+    SystemPromptTemplate,
+)
+from dbgpt.core.awel import JoinOperator
+from dbgpt.core.awel.flow.base import (
+    TAGS_ORDER_HIGH,
+    IOField,
+    OperatorCategory,
+    Parameter,
+    ViewMetadata,
+)
+from dbgpt.core.interface.llm import ModelRequest
+from dbgpt.model.operators import MixinLLMOperator
+from dbgpt.util.i18n_utils import _
+from dbgpt_app.operators.datasource import GPTVisMixin
+
+_DEFAULT_PROMPT_EN = """You are a helpful AI assistant.
+
+Please carefully read the data in the Markdown table format below, the data is a 
+database query result based on the user question. Please analyze and summarize the 
+data carefully, and provide a summary report in markdown format.
+
+<data-report>
+{data_report}
+</data-report>
+
+user question:
+{user_input}
+
+Please answer in the same language as the user's question.
+"""
+
+_DEFAULT_PROMPT_ZH = """你是一个有用的AI助手。
+
+请你仔细阅读下面的 Markdown 表格格式的数据，这是一份根据用户问题查询到的数据库的数据，\
+你需要根据数据仔细分析和总结，给出一份总结报告，使用 markdown 格式输出。
+
+<data-report>
+{data_report}
+</data-report>
+
+用户的问题:
+{user_input}
+
+请用用户提问的语言回答。
+"""
+
+_DEFAULT_USER_PROMPT = """\
+{user_input}
+"""
+
+
+@cache
+def _get_default_prompt(language: str) -> ChatPromptTemplate:
+    if language == "zh":
+        sys_prompt = _DEFAULT_PROMPT_ZH
+        user_prompt = _DEFAULT_USER_PROMPT
+    else:
+        sys_prompt = _DEFAULT_PROMPT_EN
+        user_prompt = _DEFAULT_USER_PROMPT
+
+    return ChatPromptTemplate(
+        messages=[
+            SystemPromptTemplate.from_template(sys_prompt),
+            HumanPromptTemplate.from_template(user_prompt),
+        ]
+    )
+
+
+class ReportAnalystOperator(MixinLLMOperator, JoinOperator[str]):
+    metadata = ViewMetadata(
+        label=_("Report Analyst"),
+        name="report_analyst",
+        description=_("Report Analyst"),
+        category=OperatorCategory.DATABASE,
+        tags={"order": TAGS_ORDER_HIGH},
+        parameters=[
+            Parameter.build_from(
+                _("Prompt Template"),
+                "prompt_template",
+                ChatPromptTemplate,
+                description=_("The prompt template for the conversation."),
+                optional=True,
+                default=None,
+            ),
+            Parameter.build_from(
+                _("Model Name"),
+                "model",
+                str,
+                optional=True,
+                default=None,
+                description=_("The model name."),
+            ),
+            Parameter.build_from(
+                _("LLM Client"),
+                "llm_client",
+                LLMClient,
+                optional=True,
+                default=None,
+                description=_(
+                    "The LLM Client, how to connect to the LLM model, if not provided,"
+                    " it will use the default client deployed by DB-GPT."
+                ),
+            ),
+        ],
+        inputs=[
+            IOField.build_from(
+                _("User question"),
+                "question",
+                str,
+                description=_("The question of user"),
+            ),
+            IOField.build_from(
+                _("The data report"),
+                "data_report",
+                str,
+                _("The data report in markdown format."),
+                dynamic=True,
+            ),
+        ],
+        outputs=[
+            IOField.build_from(
+                _("Report Analyst Result"),
+                "report_analyst_result",
+                str,
+                description=_("The report analyst result."),
+            )
+        ],
+    )
+
+    def __init__(
+        self,
+        prompt_template: Optional[ChatPromptTemplate] = None,
+        model: Optional[str] = None,
+        llm_client: Optional[LLMClient] = None,
+        **kwargs,
+    ):
+        JoinOperator.__init__(self, combine_function=self._join_func, **kwargs)
+        MixinLLMOperator.__init__(self, llm_client=llm_client, **kwargs)
+
+        # User must select a history merge mode
+        self._prompt_template = prompt_template
+        self._model = model
+
+    @property
+    def prompt_template(self) -> ChatPromptTemplate:
+        """Get the prompt template."""
+        language = "en"
+        if self.system_app:
+            language = self.system_app.config.get_current_lang()
+        if self._prompt_template is None:
+            return _get_default_prompt(language)
+        return self._prompt_template
+
+    async def _join_func(self, question: str, data_report: str, *args):
+        dynamic_inputs = [data_report]
+        for arg in args:
+            if isinstance(arg, str):
+                dynamic_inputs.append(arg)
+        data_report = "\n".join(dynamic_inputs)
+        messages = self.prompt_template.format_messages(
+            user_input=question,
+            data_report=data_report,
+        )
+        model_messages = ModelMessage.from_base_messages(messages)
+        models = await self.llm_client.models()
+        if not models:
+            raise Exception("No models available.")
+        model = self._model or models[0].model
+
+        model_request = ModelRequest.build_request(model, messages=model_messages)
+        model_output = await self.llm_client.generate(model_request)
+        text = model_output.gen_text_with_thinking()
+
+        return text
+
+
+class StringJoinOperator(GPTVisMixin, JoinOperator[str]):
+    """Join operator for strings.
+    This operator joins the input strings with a specified separator.
+    """
+
+    metadata = ViewMetadata(
+        label=_("String Join Operator"),
+        name="string_join_operator",
+        description=_("Merge multiple inputs into a single string."),
+        category=OperatorCategory.COMMON,
+        parameters=[
+            Parameter.build_from(
+                _("Separator"),
+                "separator",
+                str,
+                optional=True,
+                default="\n\n",
+                description=_("The separator to join the strings."),
+            ),
+        ],
+        inputs=[
+            IOField.build_from(
+                _("Input Strings"),
+                "input_strings",
+                str,
+                description=_("The input strings to join."),
+                dynamic=True,
+            ),
+        ],
+        outputs=[
+            IOField.build_from(
+                _("Joined String"),
+                "joined_string",
+                str,
+                description=_("The joined string."),
+            )
+        ],
+        tags={"order": TAGS_ORDER_HIGH},
+    )
+
+    def __init__(self, separator: str = "\n\n", **kwargs):
+        super().__init__(combine_function=self._join_func, **kwargs)
+        self.separator = separator
+
+    async def _join_func(self, *args) -> str:
+        """Join the strings with the separator."""
+        view = self.separator.join(args)
+        await self.save_view_message(self.current_dag_context, view)
+        return view
@@ -20,3 +20,25 @@ def csv_colunm_foramt(val):
         return val
     except ValueError:
         return val
+
+
+def df_to_markdown(df: pd.DataFrame, index=False) -> str:
+    """Convert a pandas DataFrame to a Markdown table."""
+    columns = df.columns
+    header = "| " + " | ".join(columns) + " |"
+    separator = "| " + " | ".join(["---"] * len(columns)) + " |"
+
+    rows = []
+    for _, row in df.iterrows():
+        row_str = "| " + " | ".join(map(str, row.values)) + " |"
+        rows.append(row_str)
+
+    if index:
+        header = "| index | " + " | ".join(columns) + " |"
+        separator = "| --- | " + " | ".join(["---"] * len(columns)) + " |"
+        rows = []
+        for idx, row in df.iterrows():
+            row_str = f"| {idx} | " + " | ".join(map(str, row.values)) + " |"
+            rows.append(row_str)
+
+    return "\n".join([header, separator] + rows)