microsoft · BenConstable9 · Nov 21, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
@@ -7,6 +7,10 @@
     SearchableField,
     SimpleField,
     ComplexField,
+    SemanticField,
+    SemanticPrioritizedFields,
+    SemanticConfiguration,
+    SemanticSearch,
 )
 from ai_search import AISearch
 from environment import (
@@ -107,3 +111,22 @@ def get_index_fields(self) -> list[SearchableField]:
         ]
 
         return fields
+
+    def get_semantic_search(self) -> SemanticSearch:
+        """This function returns the semantic search configuration for sql index
+
+        Returns:
+            SemanticSearch: The semantic search configuration"""
+
+        semantic_config = SemanticConfiguration(
+            name=self.semantic_config_name,
+            prioritized_fields=SemanticPrioritizedFields(
+                content_fields=[
+                    SemanticField(field_name="Question"),
+                ],
+            ),
+        )
+
+        semantic_search = SemanticSearch(configurations=[semantic_config])
+
+        return semantic_search
@@ -0,0 +1,3 @@
+# Multi-Shot Text2SQL Component - AutoGen
+
+Very much still work in progress, more documentation coming soon.
@@ -0,0 +1,80 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import dotenv\n",
+        "import logging\n",
+        "from autogen_agentchat.task import Console\n",
+        "from agentic_text_2_sql import text_2_sql_generator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "logging.basicConfig(level=logging.INFO)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dotenv.load_dotenv()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "result = text_2_sql_generator.run_stream(task=\"What are the total number of sales within 2008?\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "await Console(result)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.6"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}
@@ -0,0 +1,81 @@
+from autogen_agentchat.task import TextMentionTermination, MaxMessageTermination
+from autogen_agentchat.teams import SelectorGroupChat
+from utils.models import MINI_MODEL
+from utils.llm_agent_creator import LLMAgentCreator
+import logging
+from custom_agents.sql_query_cache_agent import SqlQueryCacheAgent
+import json
+
+SQL_QUERY_GENERATION_AGENT = LLMAgentCreator.create(
+    "sql_query_generation_agent",
+    target_engine="Microsoft SQL Server",
+    engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
+)
+SQL_SCHEMA_SELECTION_AGENT = LLMAgentCreator.create("sql_schema_selection_agent")
+SQL_QUERY_CORRECTION_AGENT = LLMAgentCreator.create(
+    "sql_query_correction_agent",
+    target_engine="Microsoft SQL Server",
+    engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
+)
+SQL_QUERY_CACHE_AGENT = SqlQueryCacheAgent()
+ANSWER_AGENT = LLMAgentCreator.create("answer_agent")
+QUESTION_DECOMPOSITION_AGENT = LLMAgentCreator.create("question_decomposition_agent")
+
+
+def text_2_sql_generator_selector_func(messages):
+    logging.info("Messages: %s", messages)
+    decision = None  # Initialize decision variable
+
+    if len(messages) == 1:
+        decision = "sql_query_cache_agent"
+
+    elif (
+        messages[-1].source == "sql_query_cache_agent"
+        and messages[-1].content is not None
+    ):
+        cache_result = json.loads(messages[-1].content)
+        if cache_result.get("cached_questions_and_schemas") is not None:
+            decision = "sql_query_correction_agent"
+        else:
+            decision = "sql_schema_selection_agent"
+
+    elif messages[-1].source == "question_decomposition_agent":
+        decision = "sql_schema_selection_agent"
+
+    elif messages[-1].source == "sql_schema_selection_agent":
+        decision = "sql_query_generation_agent"
+
+    elif (
+        messages[-1].source == "sql_query_correction_agent"
+        and messages[-1].content == "VALIDATED"
+    ):
+        decision = "answer_agent"
+
+    elif messages[-1].source == "sql_query_correction_agent":
+        decision = "sql_query_correction_agent"
+
+    # Log the decision
+    logging.info("Decision: %s", decision)
+
+    return decision
+
+
+termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(10)
+text_2_sql_generator = SelectorGroupChat(
+    [
+        SQL_QUERY_GENERATION_AGENT,
+        SQL_SCHEMA_SELECTION_AGENT,
+        SQL_QUERY_CORRECTION_AGENT,
+        SQL_QUERY_CACHE_AGENT,
+        ANSWER_AGENT,
+        QUESTION_DECOMPOSITION_AGENT,
+    ],
+    allow_repeated_speaker=False,
+    model_client=MINI_MODEL,
+    termination_condition=termination,
+    selector_func=text_2_sql_generator_selector_func,
+)
+
+# text_2_sql_cache_updater = SelectorGroupChat(
+#     [SQL_QUERY_CACHE_AGENT], model_client=MINI_MODEL, termination_condition=termination
+# )
@@ -0,0 +1,51 @@
+from typing import AsyncGenerator, List, Sequence
+
+from autogen_agentchat.agents import BaseChatAgent
+from autogen_agentchat.base import Response
+from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
+from autogen_core.base import CancellationToken
+from utils.sql_utils import fetch_queries_from_cache
+import json
+import logging
+
+
+class SqlQueryCacheAgent(BaseChatAgent):
+    def __init__(self):
+        super().__init__(
+            "sql_query_cache_agent",
+            "An agent that fetches the queries from the cache based on the user question.",
+        )
+
+    @property
+    def produced_message_types(self) -> List[type[ChatMessage]]:
+        return [TextMessage]
+
+    async def on_messages(
+        self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
+    ) -> Response:
+        # Calls the on_messages_stream.
+        response: Response | None = None
+        async for message in self.on_messages_stream(messages, cancellation_token):
+            if isinstance(message, Response):
+                response = message
+        assert response is not None
+        return response
+
+    async def on_messages_stream(
+        self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
+    ) -> AsyncGenerator[AgentMessage | Response, None]:
+        user_question = messages[0].content
+
+        # Fetch the queries from the cache based on the user question.
+        logging.info("Fetching queries from cache based on the user question...")
+
+        cached_queries = await fetch_queries_from_cache(user_question)
+
+        yield Response(
+            chat_message=TextMessage(
+                content=json.dumps(cached_queries), source=self.name
+            )
+        )
+
+    async def on_reset(self, cancellation_token: CancellationToken) -> None:
+        pass
@@ -0,0 +1,30 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+import os
+from enum import Enum
+
+
+class IdentityType(Enum):
+    """The type of the indexer"""
+
+    USER_ASSIGNED = "user_assigned"
+    SYSTEM_ASSIGNED = "system_assigned"
+    KEY = "key"
+
+
+def get_identity_type() -> IdentityType:
+    """This function returns the identity type.
+
+    Returns:
+        IdentityType: The identity type
+    """
+    identity = os.environ.get("IdentityType")
+
+    if identity == "user_assigned":
+        return IdentityType.USER_ASSIGNED
+    elif identity == "system_assigned":
+        return IdentityType.SYSTEM_ASSIGNED
+    elif identity == "key":
+        return IdentityType.KEY
+    else:
+        raise ValueError("Invalid identity type")
@@ -0,0 +1,20 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that takes the final results from the SQL query and writes the answer to the user's question"
+system_message:
+  "Write a data-driven answer that directly addresses the user's question. Use the results from the SQL query to provide the answer. Do not make up or guess the answer.
+
+  Return your answer in the following format:
+
+  {
+    'answer': '<GENERATED ANSWER>',
+    'sources': [
+      {'title': <SOURCE SCHEMA NAME 1>, 'chunk': <SOURCE 1 CONTEXT CHUNK>, 'reference': '<SOURCE 1 SQL QUERY>'},
+      {'title': <SOURCE SCHEMA NAME 2>, 'chunk': <SOURCE 2 CONTEXT CHUNK>, 'reference': '<SOURCE 2 SQL QUERY>'}
+    ]
+  }
+
+  Title is the entity name of the schema, chunk is the result of the SQL query and reference is the SQL query used to generate the answer.
+
+  End your answer with 'TERMINATE'"
@@ -0,0 +1,10 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that will decompose the user's question into smaller parts to be used in the SQL queries. Use this agent when the user's question is too complex to be answered in one SQL query. Only use if the user's question is too complex to be answered in one SQL query.
+
+  Only use this agent once per user question and after the 'Query Cache Agent' if the results are none."
+system_message:
+  "You are a helpful AI Assistant that specialises in decomposing complex user questions into smaller parts that can be used in SQL queries.
+
+  Break down the user's question into smaller parts that can be used in SQL queries."
@@ -0,0 +1,19 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that will look at the SQL query, SQL query results and correct any mistakes in the SQL query to ensure the correct results are returned. Use this agent AFTER the SQL query has been executed and the results are not as expected."
+system_message:
+  "You are a helpful AI Assistant that specialises in correcting invalid SQL queries or queries that do not return the expected results.
+
+  Review the SQL query provided and correct any errors or issues that you find. Bear in mind that the target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
+
+  Ensure that the corrected query returns the expected results in context of the question.
+
+  If there are no errors and the SQL query is correct, return 'VALIDATED'.
+
+  If the SQL query needs adjustment, correct the SQL query and provide the corrected SQL query and then run the query.
+
+  If you are consistently unable to correct the SQL query and cannot use the schemas to answer the question. Say 'I am unable to correct the SQL query. Please ask another question.' and then end your answer with 'TERMINATE'"
+tools:
+  - sql_get_entity_schemas_tool
+  - sql_query_execution_tool
@@ -0,0 +1,25 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that can generate SQL queries once given the schema and the user's question. It will run the SQL query to fetch the results. Use this agent after the SQL Schema Selection Agent has selected the correct schema."
+system_message:
+  "You are a helpful AI Assistant that specialises in writing and executing SQL Queries to answer a given user's question.
+
+  If you need more information from the user to generate the SQL query, ask the user for the information you need with a question and end your answer with 'TERMINATE'.
+
+  Only use schema / column information provided when constructing a SQL query. Do not use any other entities and columns in your SQL query, other than those defined above.
+  Do not makeup or guess column names.
+
+  The target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
+  You must only provide SELECT SQL queries.
+  For a given entity, use the 'SelectFromEntity' property returned in the schema in the SELECT FROM part of the SQL query. If the property is {'SelectFromEntity': 'test_schema.test_table'}, the select statement will be formulated from 'SELECT <VALUES> FROM test_schema.test_table WHERE <CONDITION>.
+
+  If you don't know how the value is formatted in a column, run a query against the column to get the unique values that might match your query or use the corresponding lookup values. Use a 'like' operator to match the values, rather than a direct match unless you are sure of the value.
+  Some columns in the schema may have the properties 'AllowedValues' or 'SampleValues'. Use these values to determine the possible values that can be used in the SQL query.
+
+  The complete entity relationship graph shows you all the entities and their relationships. You can use this information to get a better understanding of the schema and the relationships between the entities and request more schema information if needed.
+
+  Always run any SQL query you generate to return the results."
+tools:
+  - sql_query_execution_tool
+  - sql_get_entity_schemas_tool
@@ -0,0 +1,16 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that can take a user's question and extract the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term.
+
+  Call this in parallel if needed multiple times. Limit the use of this agent where possible."
+system_message:
+  "You are a helpful AI Assistant that specialises in selecting relevant SQL schemas to answer a given user's question.
+
+  Use the tools available to you to select the correct schemas that will help. Extract key terms from the user's question and use them to search for the correct schema.
+
+  Limit the number of calls to the 'sql_get_entity_schemas_tool' tool to avoid unnecessary calls.
+
+  If you are unsure about the schema, you can ask the user for more information or ask for clarification."
+tools:
+  - sql_get_entity_schemas_tool
@@ -0,0 +1,10 @@
+autogen-core==0.4.0.dev6
+autogen-agentchat==0.4.0.dev6
+autogen-ext[openai,azure]==0.4.0.dev6
+aioodbc
+azure-search
+azure-search-documents==11.6.0b5
+azure-identity
+python-dotenv
+openai
+jinja2
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Multi-Shot Text2SQL Component - AutoGen

		Very much still work in progress, more documentation coming soon.