Skip to content
Merged
23 changes: 23 additions & 0 deletions deploy_ai_search/text_2_sql_query_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
SearchableField,
SimpleField,
ComplexField,
SemanticField,
SemanticPrioritizedFields,
SemanticConfiguration,
SemanticSearch,
)
from ai_search import AISearch
from environment import (
Expand Down Expand Up @@ -107,3 +111,22 @@ def get_index_fields(self) -> list[SearchableField]:
]

return fields

def get_semantic_search(self) -> SemanticSearch:
"""This function returns the semantic search configuration for sql index

Returns:
SemanticSearch: The semantic search configuration"""

semantic_config = SemanticConfiguration(
name=self.semantic_config_name,
prioritized_fields=SemanticPrioritizedFields(
content_fields=[
SemanticField(field_name="Question"),
],
),
)

semantic_search = SemanticSearch(configurations=[semantic_config])

return semantic_search
3 changes: 3 additions & 0 deletions text_2_sql/autogen/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Multi-Shot Text2SQL Component - AutoGen

Very much still work in progress, more documentation coming soon.
80 changes: 80 additions & 0 deletions text_2_sql/autogen/agentic_text_2_sql.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import dotenv\n",
"import logging\n",
"from autogen_agentchat.task import Console\n",
"from agentic_text_2_sql import text_2_sql_generator"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"logging.basicConfig(level=logging.INFO)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dotenv.load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result = text_2_sql_generator.run_stream(task=\"What are the total number of sales within 2008?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"await Console(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
81 changes: 81 additions & 0 deletions text_2_sql/autogen/agentic_text_2_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from autogen_agentchat.task import TextMentionTermination, MaxMessageTermination
from autogen_agentchat.teams import SelectorGroupChat
from utils.models import MINI_MODEL
from utils.llm_agent_creator import LLMAgentCreator
import logging
from custom_agents.sql_query_cache_agent import SqlQueryCacheAgent
import json

SQL_QUERY_GENERATION_AGENT = LLMAgentCreator.create(
"sql_query_generation_agent",
target_engine="Microsoft SQL Server",
engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
)
SQL_SCHEMA_SELECTION_AGENT = LLMAgentCreator.create("sql_schema_selection_agent")
SQL_QUERY_CORRECTION_AGENT = LLMAgentCreator.create(
"sql_query_correction_agent",
target_engine="Microsoft SQL Server",
engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
)
SQL_QUERY_CACHE_AGENT = SqlQueryCacheAgent()
ANSWER_AGENT = LLMAgentCreator.create("answer_agent")
QUESTION_DECOMPOSITION_AGENT = LLMAgentCreator.create("question_decomposition_agent")


def text_2_sql_generator_selector_func(messages):
logging.info("Messages: %s", messages)
decision = None # Initialize decision variable

if len(messages) == 1:
decision = "sql_query_cache_agent"

elif (
messages[-1].source == "sql_query_cache_agent"
and messages[-1].content is not None
):
cache_result = json.loads(messages[-1].content)
if cache_result.get("cached_questions_and_schemas") is not None:
decision = "sql_query_correction_agent"
else:
decision = "sql_schema_selection_agent"

elif messages[-1].source == "question_decomposition_agent":
decision = "sql_schema_selection_agent"

elif messages[-1].source == "sql_schema_selection_agent":
decision = "sql_query_generation_agent"

elif (
messages[-1].source == "sql_query_correction_agent"
and messages[-1].content == "VALIDATED"
):
decision = "answer_agent"

elif messages[-1].source == "sql_query_correction_agent":
decision = "sql_query_correction_agent"

# Log the decision
logging.info("Decision: %s", decision)

return decision


termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(10)
text_2_sql_generator = SelectorGroupChat(
[
SQL_QUERY_GENERATION_AGENT,
SQL_SCHEMA_SELECTION_AGENT,
SQL_QUERY_CORRECTION_AGENT,
SQL_QUERY_CACHE_AGENT,
ANSWER_AGENT,
QUESTION_DECOMPOSITION_AGENT,
],
allow_repeated_speaker=False,
model_client=MINI_MODEL,
termination_condition=termination,
selector_func=text_2_sql_generator_selector_func,
)

# text_2_sql_cache_updater = SelectorGroupChat(
# [SQL_QUERY_CACHE_AGENT], model_client=MINI_MODEL, termination_condition=termination
# )
Empty file.
51 changes: 51 additions & 0 deletions text_2_sql/autogen/custom_agents/sql_query_cache_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from typing import AsyncGenerator, List, Sequence

from autogen_agentchat.agents import BaseChatAgent
from autogen_agentchat.base import Response
from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
from autogen_core.base import CancellationToken
from utils.sql_utils import fetch_queries_from_cache
import json
import logging


class SqlQueryCacheAgent(BaseChatAgent):
def __init__(self):
super().__init__(
"sql_query_cache_agent",
"An agent that fetches the queries from the cache based on the user question.",
)

@property
def produced_message_types(self) -> List[type[ChatMessage]]:
return [TextMessage]

async def on_messages(
self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
) -> Response:
# Calls the on_messages_stream.
response: Response | None = None
async for message in self.on_messages_stream(messages, cancellation_token):
if isinstance(message, Response):
response = message
assert response is not None
return response

async def on_messages_stream(
self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
) -> AsyncGenerator[AgentMessage | Response, None]:
user_question = messages[0].content

# Fetch the queries from the cache based on the user question.
logging.info("Fetching queries from cache based on the user question...")

cached_queries = await fetch_queries_from_cache(user_question)

yield Response(
chat_message=TextMessage(
content=json.dumps(cached_queries), source=self.name
)
)

async def on_reset(self, cancellation_token: CancellationToken) -> None:
pass
30 changes: 30 additions & 0 deletions text_2_sql/autogen/environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
from enum import Enum


class IdentityType(Enum):
"""The type of the indexer"""

USER_ASSIGNED = "user_assigned"
SYSTEM_ASSIGNED = "system_assigned"
KEY = "key"


def get_identity_type() -> IdentityType:
"""This function returns the identity type.

Returns:
IdentityType: The identity type
"""
identity = os.environ.get("IdentityType")

if identity == "user_assigned":
return IdentityType.USER_ASSIGNED
elif identity == "system_assigned":
return IdentityType.SYSTEM_ASSIGNED
elif identity == "key":
return IdentityType.KEY
else:
raise ValueError("Invalid identity type")
20 changes: 20 additions & 0 deletions text_2_sql/autogen/llm_agents/answer_agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
model:
gpt-4o-mini
description:
"An agent that takes the final results from the SQL query and writes the answer to the user's question"
system_message:
"Write a data-driven answer that directly addresses the user's question. Use the results from the SQL query to provide the answer. Do not make up or guess the answer.

Return your answer in the following format:

{
'answer': '<GENERATED ANSWER>',
'sources': [
{'title': <SOURCE SCHEMA NAME 1>, 'chunk': <SOURCE 1 CONTEXT CHUNK>, 'reference': '<SOURCE 1 SQL QUERY>'},
{'title': <SOURCE SCHEMA NAME 2>, 'chunk': <SOURCE 2 CONTEXT CHUNK>, 'reference': '<SOURCE 2 SQL QUERY>'}
]
}

Title is the entity name of the schema, chunk is the result of the SQL query and reference is the SQL query used to generate the answer.

End your answer with 'TERMINATE'"
10 changes: 10 additions & 0 deletions text_2_sql/autogen/llm_agents/question_decomposition_agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
model:
gpt-4o-mini
description:
"An agent that will decompose the user's question into smaller parts to be used in the SQL queries. Use this agent when the user's question is too complex to be answered in one SQL query. Only use if the user's question is too complex to be answered in one SQL query.

Only use this agent once per user question and after the 'Query Cache Agent' if the results are none."
system_message:
"You are a helpful AI Assistant that specialises in decomposing complex user questions into smaller parts that can be used in SQL queries.

Break down the user's question into smaller parts that can be used in SQL queries."
19 changes: 19 additions & 0 deletions text_2_sql/autogen/llm_agents/sql_query_correction_agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
model:
gpt-4o-mini
description:
"An agent that will look at the SQL query, SQL query results and correct any mistakes in the SQL query to ensure the correct results are returned. Use this agent AFTER the SQL query has been executed and the results are not as expected."
system_message:
"You are a helpful AI Assistant that specialises in correcting invalid SQL queries or queries that do not return the expected results.

Review the SQL query provided and correct any errors or issues that you find. Bear in mind that the target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}

Ensure that the corrected query returns the expected results in context of the question.

If there are no errors and the SQL query is correct, return 'VALIDATED'.

If the SQL query needs adjustment, correct the SQL query and provide the corrected SQL query and then run the query.

If you are consistently unable to correct the SQL query and cannot use the schemas to answer the question. Say 'I am unable to correct the SQL query. Please ask another question.' and then end your answer with 'TERMINATE'"
tools:
- sql_get_entity_schemas_tool
- sql_query_execution_tool
25 changes: 25 additions & 0 deletions text_2_sql/autogen/llm_agents/sql_query_generation_agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
model:
gpt-4o-mini
description:
"An agent that can generate SQL queries once given the schema and the user's question. It will run the SQL query to fetch the results. Use this agent after the SQL Schema Selection Agent has selected the correct schema."
system_message:
"You are a helpful AI Assistant that specialises in writing and executing SQL Queries to answer a given user's question.

If you need more information from the user to generate the SQL query, ask the user for the information you need with a question and end your answer with 'TERMINATE'.

Only use schema / column information provided when constructing a SQL query. Do not use any other entities and columns in your SQL query, other than those defined above.
Do not makeup or guess column names.

The target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
You must only provide SELECT SQL queries.
For a given entity, use the 'SelectFromEntity' property returned in the schema in the SELECT FROM part of the SQL query. If the property is {'SelectFromEntity': 'test_schema.test_table'}, the select statement will be formulated from 'SELECT <VALUES> FROM test_schema.test_table WHERE <CONDITION>.

If you don't know how the value is formatted in a column, run a query against the column to get the unique values that might match your query or use the corresponding lookup values. Use a 'like' operator to match the values, rather than a direct match unless you are sure of the value.
Some columns in the schema may have the properties 'AllowedValues' or 'SampleValues'. Use these values to determine the possible values that can be used in the SQL query.

The complete entity relationship graph shows you all the entities and their relationships. You can use this information to get a better understanding of the schema and the relationships between the entities and request more schema information if needed.

Always run any SQL query you generate to return the results."
tools:
- sql_query_execution_tool
- sql_get_entity_schemas_tool
16 changes: 16 additions & 0 deletions text_2_sql/autogen/llm_agents/sql_schema_selection_agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
model:
gpt-4o-mini
description:
"An agent that can take a user's question and extract the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term.

Call this in parallel if needed multiple times. Limit the use of this agent where possible."
system_message:
"You are a helpful AI Assistant that specialises in selecting relevant SQL schemas to answer a given user's question.

Use the tools available to you to select the correct schemas that will help. Extract key terms from the user's question and use them to search for the correct schema.

Limit the number of calls to the 'sql_get_entity_schemas_tool' tool to avoid unnecessary calls.

If you are unsure about the schema, you can ask the user for more information or ask for clarification."
tools:
- sql_get_entity_schemas_tool
10 changes: 10 additions & 0 deletions text_2_sql/autogen/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
autogen-core==0.4.0.dev6
autogen-agentchat==0.4.0.dev6
autogen-ext[openai,azure]==0.4.0.dev6
aioodbc
azure-search
azure-search-documents==11.6.0b5
azure-identity
python-dotenv
openai
jinja2
Empty file.
Loading