diff --git a/deploy_ai_search/text_2_sql_schema_store.py b/deploy_ai_search/text_2_sql_schema_store.py
index 5f8c24d..3c7b50c 100644
--- a/deploy_ai_search/text_2_sql_schema_store.py
+++ b/deploy_ai_search/text_2_sql_schema_store.py
@@ -32,7 +32,7 @@ class DatabaseEngine(StrEnum):
     """An enumeration to represent a database engine."""
 
     SNOWFLAKE = "SNOWFLAKE"
-    SQL_SERVER = "SQL_SERVER"
+    TSQL = "TSQL"
     DATABRICKS = "DATABRICKS"
 
 
@@ -69,7 +69,7 @@ def excluded_fields_for_database_engine(self):
         all_engine_specific_fields = ["Warehouse", "Database", "Catalog"]
         if self.database_engine == DatabaseEngine.SNOWFLAKE:
             engine_specific_fields = ["Warehouse", "Database"]
-        elif self.database_engine == DatabaseEngine.SQL_SERVER:
+        elif self.database_engine == DatabaseEngine.TSQL:
             engine_specific_fields = ["Database"]
         elif self.database_engine == DatabaseEngine.DATABRICKS:
             engine_specific_fields = ["Catalog"]
diff --git a/text_2_sql/README.md b/text_2_sql/README.md
index 3e52275..fc25680 100644
--- a/text_2_sql/README.md
+++ b/text_2_sql/README.md
@@ -39,6 +39,7 @@ Three different iterations are presented and code provided for:
  - **Iteration 2:** Injection of a brief description of the available entities is injected into the prompt. This limits the number of tokens used and avoids filling the prompt with confusing schema information.
  - **Iteration 3:** Indexing the entity definitions in a vector database, such as AI Search, and querying it to retrieve the most relevant entities for the key terms from the query.
   - **Iteration 4:** Keeping an index of commonly asked questions and which schema / SQL query they resolve to - this index is generated by the LLM when it encounters a question that has not been previously asked. Additionally, indexing the entity definitions in a vector database, such as AI Search _(same as Iteration 3)_. First querying this index to see if a similar SQL query can be obtained _(if high probability of exact SQL query match, the results can be pre-fetched)_. If not, falling back to the schema index, and querying it to retrieve the most relevant entities for the key terms from the query.
+  - **Iteration 5:** Moves the Iteration 4 approach into a multi-agent approach for improved reasoning and query generation. With separation into agents, different agents can focus on one task only, and provide a better overall flow and response quality. See more details below.
 
 All approaches limit the number of tokens used and avoids filling the prompt with confusing schema information.
 
@@ -48,15 +49,17 @@ To improve the scalability and accuracy in SQL Query generation, the entity rela
 
 For the query cache enabled approach, AI Search is used as a vector based cache, but any other cache that supports vector queries could be used, such as Redis.
 
-### Full Logical Flow for Vector Based Approach
+### Full Logical Flow for Agentic Vector Based Approach
 
-The following diagram shows the logical flow within the Vector Based plugin. In an ideal scenario, the questions will follow the _Pre-Fetched Cache Results Path** which leads to the quickest answer generation. In cases where the question is not known, the plugin will fall back the other paths accordingly and generate the SQL query using the LLM. The cache is then updated with the newly generated query and schemas.
+The following diagram shows the logical flow within mutlti agent system. In an ideal scenario, the questions will follow the _Pre-Fetched Cache Results Path** which leads to the quickest answer generation. In cases where the question is not known, the group chat selector  will fall back to the other agents accordingly and generate the SQL query using the LLMs. The cache is then updated with the newly generated query and schemas.
+
+Unlike the previous approaches, **gpt4o-mini** can be used as each agent's prompt is small and focuses on a single simple task.
 
 As the query cache is shared between users (no data is stored in the cache), a new user can benefit from the pre-mapped question and schema resolution in the index.
 
 **Database results were deliberately not stored within the cache. Storing them would have removed one of the key benefits of the Text2SQL plugin, the ability to get near-real time information inside a RAG application. Instead, the query is stored so that the most-recent results can be obtained quickly. Additionally, this retains the ability to apply Row or Column Level Security.**
 
-![Vector Based with Query Cache Logical Flow.](./images/Text2SQL%20Query%20Cache.png "Vector Based with Query Cache Logical Flow")
+![Vector Based with Query Cache Logical Flow.](./images/Agentic%20Text2SQL%20Query%20Cache.png "Agentic Vector Based with Query Cache Logical Flow")
 
 ### Caching Strategy
 
@@ -68,20 +71,22 @@ The cache strategy implementation is a simple way to prove that the system works
 - **Always update:** Always add all questions into the cache when they are asked. The sample code in the repository currently implements this approach, but this could lead to poor SQL queries reaching the cache. One of the other caching strategies would be better production version.
 
 ### Comparison of Iterations
-| | Common Text2SQL Approach | Prompt Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach With Query Cache |
-|-|-|-|-|-|
+| | Common Text2SQL Approach | Prompt Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach With Query Cache | Agentic Vector Based Multi-Shot Text2SQL Approach With Query Cache |
+|-|-|-|-|-|-|
 |**Advantages** | Fast for a limited number of entities. | Significant reduction in token usage. | Significant reduction in token usage. | Significant reduction in token usage.
-| | | | Scales well to multiple entities. | Scales well to multiple entities. |
-| | | | Uses a vector approach to detect the best fitting entity which is faster than using an LLM. Matching is offloaded to AI Search. | Uses a vector approach to detect the best fitting entity which is faster than using an LLM. Matching is offloaded to AI Search. |
-| | | | | Significantly faster to answer similar questions as best fitting entity detection is skipped. Observed tests resulted in almost half the time for final output compared to the previous iteration. |
-| | | | | Significantly faster execution time for known questions. Total execution time can be reduced by skipping the query generation step. |
-|**Disadvantages** | Slows down significantly as the number of entities increases. | Uses LLM to detect the best fitting entity which is slow compared to a vector approach. | AI Search adds additional cost to the solution. | Slower than other approaches for the first time a question with no similar questions in the cache is asked. |
-| | Consumes a significant number of tokens as number of entities increases. | As number of entities increases, token usage will grow but at a lesser rate than Iteration 1. | | AI Search adds additional cost to the solution. |
-| | LLM struggled to differentiate which table to choose with the large amount of information passed. | | |
+| | | | Scales well to multiple entities. | Scales well to multiple entities. | Scales well to multiple entities with small agents. |
+| | | | Uses a vector approach to detect the best fitting entity which is faster than using an LLM. Matching is offloaded to AI Search. | Uses a vector approach to detect the best fitting entity which is faster than using an LLM. Matching is offloaded to AI Search. | Uses a vector approach to detect the best fitting entity which is faster than using an LLM. Matching is offloaded to AI Search. |
+| | | | | Significantly faster to answer similar questions as best fitting entity detection is skipped. Observed tests resulted in almost half the time for final output compared to the previous iteration. | Significantly faster to answer similar questions as best fitting entity detection is skipped. Observed tests resulted in almost half the time for final output compared to the previous iteration. |
+| | | | | Significantly faster execution time for known questions. Total execution time can be reduced by skipping the query generation step. | Significantly faster execution time for known questions. Total execution time can be reduced by skipping the query generation step. |
+| | | | |  | Instruction following and accuracy is improved by decomposing the task into smaller tasks. |
+| | | | |  | Handles query decomposition for complex questions. |
+|**Disadvantages** | Slows down significantly as the number of entities increases. | Uses LLM to detect the best fitting entity which is slow compared to a vector approach. | AI Search adds additional cost to the solution. | Slower than other approaches for the first time a question with no similar questions in the cache is asked. | Slower than other approaches for the first time a question with no similar questions in the cache is asked. |
+| | Consumes a significant number of tokens as number of entities increases. | As number of entities increases, token usage will grow but at a lesser rate than Iteration 1. | | AI Search adds additional cost to the solution. | AI Search and multiple agents adds additional cost to the solution. |
+| | LLM struggled to differentiate which table to choose with the large amount of information passed. | | | |
 |**Code Availability**| | | | |
-| Semantic Kernel | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: |
-| LangChain | | | | |
-| AutoGen | | | | | |
+| Semantic Kernel | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: | |
+| LangChain | | | | | |
+| AutoGen | | | | | Yes :heavy_check_mark: |
 
 ### Complete Execution Time Comparison for Approaches
 
@@ -247,13 +252,28 @@ The following environmental variables control the behaviour of the Vector Based
 - **Text2Sql__UseQueryCache** - controls whether the query cached index is checked before using the standard schema index.
 - **Text2Sql__PreRunQueryCache** - controls whether the top result from the query cache index (if enabled) is pre-fetched against the data source to include the results in the prompt.
 
+## Agentic Vector Based Approach (Iteration 5)
+
+This approach builds on the the Vector Based SQL Plugin approach, but adds a agentic approach to the solution.
+
+This agentic system contains the following agents:
+
+- **Query Cache Agent:** Responsible for checking the cache for previously asked questions.
+- **Query Decomposition Agent:** Responsible for decomposing complex questions, into sub questions that can be answered with SQL.
+- **Schema Selection Agent:** Responsible for extracting key terms from the question and checking the index store for the queries.
+- **SQL Query Generation Agent:** Responsible for using the previously extracted schemas and generated SQL queries to answer the question. This agent can request more schemas if needed. This agent will run the query.
+- **SQL Query Verification Agent:** Responsible for verifying that the SQL query and results question will answer the question.
+- **Answer Generation Agent:** Responsible for taking the database results and generating the final answer for the user.
+
+The combination of this agent allows the system to answer complex questions, whilst staying under the token limits when including the database schemas. The query cache ensures that previously asked questions, can be answered quickly to avoid degrading user experience.
+
 ## Code Availability
 
-| | Common Text2SQL Approach | Prompt Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach With Query Cache |
-|-|-|-|-|-|
-| Semantic Kernel | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: |
-| LangChain | | | | |
-| AutoGen | | | | | |
+| | Common Text2SQL Approach | Prompt Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach | Vector Based Multi-Shot Text2SQL Approach With Query Cache | Agentic Vector Based Multi-Shot Text2SQL Approach With Query Cache |
+|-|-|-|-|-|-|
+| Semantic Kernel | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: | Yes :heavy_check_mark: | |
+| LangChain | | | | | |
+| AutoGen | | | | | Yes :heavy_check_mark: |
 
 See the relevant directory for the code in the provided framework.
 
diff --git a/text_2_sql/autogen/README.md b/text_2_sql/autogen/README.md
index 4b05a49..1114e0c 100644
--- a/text_2_sql/autogen/README.md
+++ b/text_2_sql/autogen/README.md
@@ -1,3 +1,80 @@
 # Multi-Shot Text2SQL Component - AutoGen
 
-Very much still work in progress, more documentation coming soon.
+The implementation is written for [AutoGen](https://github.com/microsoft/autogen) in Python, although it can easily be adapted for C#.
+
+**Still work in progress, expect a lot of updates shortly**
+
+**The provided AutoGen code only implements Iterations 5 (Agentic Approach)**
+
+## Full Logical Flow for Agentic Vector Based Approach
+
+The following diagram shows the logical flow within mutlti agent system. In an ideal scenario, the questions will follow the _Pre-Fetched Cache Results Path** which leads to the quickest answer generation. In cases where the question is not known, the group chat selector  will fall back to the other agents accordingly and generate the SQL query using the LLMs. The cache is then updated with the newly generated query and schemas.
+
+Unlike the previous approaches, **gpt4o-mini** can be used as each agent's prompt is small and focuses on a single simple task.
+
+As the query cache is shared between users (no data is stored in the cache), a new user can benefit from the pre-mapped question and schema resolution in the index. There are multiple possible strategies for updating the query cache, see the possible options in the Text2SQL README.
+
+**Database results were deliberately not stored within the cache. Storing them would have removed one of the key benefits of the Text2SQL plugin, the ability to get near-real time information inside a RAG application. Instead, the query is stored so that the most-recent results can be obtained quickly. Additionally, this retains the ability to apply Row or Column Level Security.**
+
+![Vector Based with Query Cache Logical Flow.](../images/Agentic%20Text2SQL%20Query%20Cache.png "Agentic Vector Based with Query Cache Logical Flow")
+
+## Provided Notebooks & Scripts
+
+- `./agentic_text_2_sql.ipynb` provides example of how to utilise the Agentic Vector Based Text2SQL approach to query the database. The query cache plugin will be enabled or disabled depending on the environmental parameters.
+
+## Agents
+
+This approach builds on the the Vector Based SQL Plugin approach, but adds a agentic approach to the solution.
+
+This agentic system contains the following agents:
+
+- **Query Cache Agent:** Responsible for checking the cache for previously asked questions.
+- **Query Decomposition Agent:** Responsible for decomposing complex questions, into sub questions that can be answered with SQL.
+- **Schema Selection Agent:** Responsible for extracting key terms from the question and checking the index store for the queries.
+- **SQL Query Generation Agent:** Responsible for using the previously extracted schemas and generated SQL queries to answer the question. This agent can request more schemas if needed. This agent will run the query.
+- **SQL Query Verification Agent:** Responsible for verifying that the SQL query and results question will answer the question.
+- **Answer Generation Agent:** Responsible for taking the database results and generating the final answer for the user.
+
+The combination of this agent allows the system to answer complex questions, whilst staying under the token limits when including the database schemas. The query cache ensures that previously asked questions, can be answered quickly to avoid degrading user experience.
+
+All agents can be found in `/agents/`.
+
+## agentic_text_2_sql.py
+
+This is the main entry point for the agentic system. In here, the `Selector Group Chat` is configured with the termination conditions to orchestrate the agents within the system.
+
+A customer transition selector is used to automatically transition between agents dependent on the last one that was used. In some cases, this choice is delegated to an LLM to decide on the most appropriate action. This mixed approach allows for speed when needed (e.g. always calling Query Cache Agent first), but will allow the system to react dynamically to the events.
+
+## Utils
+
+### ai-search.py
+
+This util file contains helper functions for interacting with AI Search.
+
+### llm_agent_creator.py
+
+This util file creates the agents in the AutoGen framework based on the configuration files.
+
+### models.py
+
+This util file creates the model connections to Azure OpenAI for the agents.
+
+### sql.py
+
+#### get_entity_schema()
+
+This method is called by the AutoGen framework automatically, when instructed to do so by the LLM, to search the AI Search instance with the given text. The LLM is able to pass the key terms from the user query, and retrieve a ranked list of the most suitable entities to answer the question.
+
+The search text passed is vectorised against the entity level **Description** columns. A hybrid Semantic Reranking search is applied against the **EntityName**, **Entity**, **Columns/Name** fields.
+
+#### fetch_queries_from_cache()
+
+The vector based with query cache uses the `fetch_queries_from_cache()` method to fetch the most relevant previous query and injects it into the prompt before the initial LLM call. The use of Auto-Function Calling here is avoided to reduce the response time as the cache index will always be used first.
+
+If the score of the top result is higher than the defined threshold, the query will be executed against the target data source and the results included in the prompt. This allows us to prompt the LLM to evaluated whether it can use these results to answer the question, **without further SQL Query generation** to speed up the process.
+
+#### run_sql_query()
+
+This method is called by the AutoGen framework automatically, when instructed to do so by the LLM, to run a SQL query against the given database. It returns a JSON string containing a row wise dump of the results returned. These results are then interpreted to answer the question.
+
+Additionally, if any of the cache functionality is enabled, this method will update the query cache index based on the SQL query run, and the schemas used in execution.
diff --git a/text_2_sql/autogen/agentic_text_2_sql.ipynb b/text_2_sql/autogen/agentic_text_2_sql.ipynb
index 3a14137..f1f9996 100644
--- a/text_2_sql/autogen/agentic_text_2_sql.ipynb
+++ b/text_2_sql/autogen/agentic_text_2_sql.ipynb
@@ -1,5 +1,25 @@
 {
   "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Copyright (c) Microsoft Corporation.\n",
+        "\n",
+        "Licensed under the MIT License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Text2SQL with AutoGen & Azure OpenAI\n",
+        "\n",
+        "This notebook demonstrates how the AutoGen Agents can be integrated with Azure OpenAI to answer questions from the database based on the schemas provided. \n",
+        "\n",
+        "A multi-shot approach is used for SQL generation for more reliable results and reduced token usage. More details can be found in the README.md."
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -9,7 +29,7 @@
         "import dotenv\n",
         "import logging\n",
         "from autogen_agentchat.task import Console\n",
-        "from agentic_text_2_sql import text_2_sql_generator"
+        "from agentic_text_2_sql import AgenticText2Sql"
       ]
     },
     {
@@ -30,13 +50,29 @@
         "dotenv.load_dotenv()"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Bot setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "agentic_text_2_sql = AgenticText2Sql(target_engine=\"TSQL\", engine_specific_rules=\"Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.\").agentic_flow"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
-        "result = text_2_sql_generator.run_stream(task=\"What are the total number of sales within 2008?\")"
+        "result = agentic_text_2_sql.run_stream(task=\"What are the total number of sales within 2008?\")"
       ]
     },
     {
@@ -72,7 +108,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.12.6"
+      "version": "3.12.7"
     }
   },
   "nbformat": 4,
diff --git a/text_2_sql/autogen/agentic_text_2_sql.py b/text_2_sql/autogen/agentic_text_2_sql.py
index 0c51e66..2320e56 100644
--- a/text_2_sql/autogen/agentic_text_2_sql.py
+++ b/text_2_sql/autogen/agentic_text_2_sql.py
@@ -5,75 +5,133 @@
 from utils.models import MINI_MODEL
 from utils.llm_agent_creator import LLMAgentCreator
 import logging
-from custom_agents.sql_query_cache_agent import SqlQueryCacheAgent
+from agents.custom_agents.sql_query_cache_agent import SqlQueryCacheAgent
 import json
+import os
 
-SQL_QUERY_GENERATION_AGENT = LLMAgentCreator.create(
-    "sql_query_generation_agent",
-    target_engine="Microsoft SQL Server",
-    engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
-)
-SQL_SCHEMA_SELECTION_AGENT = LLMAgentCreator.create("sql_schema_selection_agent")
-SQL_QUERY_CORRECTION_AGENT = LLMAgentCreator.create(
-    "sql_query_correction_agent",
-    target_engine="Microsoft SQL Server",
-    engine_specific_rules="Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.",
-)
-SQL_QUERY_CACHE_AGENT = SqlQueryCacheAgent()
-ANSWER_AGENT = LLMAgentCreator.create("answer_agent")
-QUESTION_DECOMPOSITION_AGENT = LLMAgentCreator.create("question_decomposition_agent")
-
-
-def text_2_sql_generator_selector_func(messages):
-    logging.info("Messages: %s", messages)
-    decision = None  # Initialize decision variable
-
-    if len(messages) == 1:
-        decision = "sql_query_cache_agent"
-
-    elif (
-        messages[-1].source == "sql_query_cache_agent"
-        and messages[-1].content is not None
-    ):
-        cache_result = json.loads(messages[-1].content)
-        if cache_result.get("cached_questions_and_schemas") is not None:
+
+class AgenticText2Sql:
+    def __init__(self, target_engine: str, engine_specific_rules: str):
+        self.use_query_cache = False
+        self.pre_run_query_cache = False
+
+        self.target_engine = target_engine
+        self.engine_specific_rules = engine_specific_rules
+
+        self.set_mode()
+
+    def set_mode(self):
+        """Set the mode of the plugin based on the environment variables."""
+        self.use_query_cache = (
+            os.environ.get("Text2Sql__UseQueryCache", "False").lower() == "true"
+        )
+
+        self.pre_run_query_cache = (
+            os.environ.get("Text2Sql__PreRunQueryCache", "False").lower() == "true"
+        )
+
+    @property
+    def agents(self):
+        """Define the agents for the chat."""
+        SQL_QUERY_GENERATION_AGENT = LLMAgentCreator.create(
+            "sql_query_generation_agent",
+            target_engine=self.target_engine,
+            engine_specific_rules=self.engine_specific_rules,
+        )
+        SQL_SCHEMA_SELECTION_AGENT = LLMAgentCreator.create(
+            "sql_schema_selection_agent",
+            use_case="Sales data for a company that specializes in selling products online.",
+        )
+        SQL_QUERY_CORRECTION_AGENT = LLMAgentCreator.create(
+            "sql_query_correction_agent",
+            target_engine=self.target_engine,
+            engine_specific_rules=self.engine_specific_rules,
+        )
+
+        ANSWER_AGENT = LLMAgentCreator.create("answer_agent")
+        QUESTION_DECOMPOSITION_AGENT = LLMAgentCreator.create(
+            "question_decomposition_agent"
+        )
+
+        agents = [
+            SQL_QUERY_GENERATION_AGENT,
+            SQL_SCHEMA_SELECTION_AGENT,
+            SQL_QUERY_CORRECTION_AGENT,
+            ANSWER_AGENT,
+            QUESTION_DECOMPOSITION_AGENT,
+        ]
+
+        if self.use_query_cache:
+            SQL_QUERY_CACHE_AGENT = SqlQueryCacheAgent()
+            agents.append(SQL_QUERY_CACHE_AGENT)
+
+        return agents
+
+    @property
+    def termination_condition(self):
+        """Define the termination condition for the chat."""
+        termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(10)
+        return termination
+
+    @staticmethod
+    def selector(messages):
+        logging.info("Messages: %s", messages)
+        decision = None  # Initialize decision variable
+
+        if len(messages) == 1:
+            decision = "sql_query_cache_agent"
+
+        elif (
+            messages[-1].source == "sql_query_cache_agent"
+            and messages[-1].content is not None
+        ):
+            cache_result = json.loads(messages[-1].content)
+            if cache_result.get("cached_questions_and_schemas") is not None:
+                decision = "sql_query_correction_agent"
+            else:
+                decision = "sql_schema_selection_agent"
+
+        elif messages[-1].source == "sql_query_cache_agent":
+            decision = "question_decomposition_agent"
+
+        elif messages[-1].source == "question_decomposition_agent":
+            decomposition_result = json.loads(messages[-1].content)
+
+            if len(decomposition_result["entities"]) == 1:
+                decision = "sql_schema_selection_agent"
+            else:
+                decision = "parallel_sql_flow_agent"
+
+        elif messages[-1].source == "sql_schema_selection_agent":
+            decision = "sql_query_generation_agent"
+
+        elif (
+            messages[-1].source == "sql_query_correction_agent"
+            and messages[-1].content == "VALIDATED"
+        ):
+            decision = "answer_agent"
+
+        elif messages[-1].source == "sql_query_correction_agent":
             decision = "sql_query_correction_agent"
-        else:
-            decision = "sql_schema_selection_agent"
-
-    elif messages[-1].source == "question_decomposition_agent":
-        decision = "sql_schema_selection_agent"
-
-    elif messages[-1].source == "sql_schema_selection_agent":
-        decision = "sql_query_generation_agent"
-
-    elif (
-        messages[-1].source == "sql_query_correction_agent"
-        and messages[-1].content == "VALIDATED"
-    ):
-        decision = "answer_agent"
-
-    elif messages[-1].source == "sql_query_correction_agent":
-        decision = "sql_query_correction_agent"
-
-    # Log the decision
-    logging.info("Decision: %s", decision)
-
-    return decision
-
-
-termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(10)
-text_2_sql_generator = SelectorGroupChat(
-    [
-        SQL_QUERY_GENERATION_AGENT,
-        SQL_SCHEMA_SELECTION_AGENT,
-        SQL_QUERY_CORRECTION_AGENT,
-        SQL_QUERY_CACHE_AGENT,
-        ANSWER_AGENT,
-        QUESTION_DECOMPOSITION_AGENT,
-    ],
-    allow_repeated_speaker=False,
-    model_client=MINI_MODEL,
-    termination_condition=termination,
-    selector_func=text_2_sql_generator_selector_func,
-)
+
+        # Log the decision
+        logging.info("Decision: %s", decision)
+
+        return decision
+
+    @property
+    def agentic_flow(self):
+        """Run the agentic flow for the given question.
+
+        Args:
+        ----
+            question (str): The question to run the agentic flow on."""
+        agentic_flow = SelectorGroupChat(
+            self.agents,
+            allow_repeated_speaker=False,
+            model_client=MINI_MODEL,
+            termination_condition=self.termination_condition,
+            selector_func=AgenticText2Sql.selector,
+        )
+
+        return agentic_flow
diff --git a/text_2_sql/autogen/custom_agents/__init__.py b/text_2_sql/autogen/agents/__init__.py
similarity index 100%
rename from text_2_sql/autogen/custom_agents/__init__.py
rename to text_2_sql/autogen/agents/__init__.py
diff --git a/text_2_sql/autogen/agents/custom_agents/__init__.py b/text_2_sql/autogen/agents/custom_agents/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/text_2_sql/autogen/custom_agents/sql_query_cache_agent.py b/text_2_sql/autogen/agents/custom_agents/sql_query_cache_agent.py
similarity index 97%
rename from text_2_sql/autogen/custom_agents/sql_query_cache_agent.py
rename to text_2_sql/autogen/agents/custom_agents/sql_query_cache_agent.py
index 97ef9db..054a898 100644
--- a/text_2_sql/autogen/custom_agents/sql_query_cache_agent.py
+++ b/text_2_sql/autogen/agents/custom_agents/sql_query_cache_agent.py
@@ -6,7 +6,7 @@
 from autogen_agentchat.base import Response
 from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
 from autogen_core.base import CancellationToken
-from utils.sql_utils import fetch_queries_from_cache
+from utils.sql import fetch_queries_from_cache
 import json
 import logging
 
diff --git a/text_2_sql/autogen/llm_agents/answer_agent.yaml b/text_2_sql/autogen/agents/llm_agents/answer_agent.yaml
similarity index 100%
rename from text_2_sql/autogen/llm_agents/answer_agent.yaml
rename to text_2_sql/autogen/agents/llm_agents/answer_agent.yaml
diff --git a/text_2_sql/autogen/agents/llm_agents/question_decomposition_agent.yaml b/text_2_sql/autogen/agents/llm_agents/question_decomposition_agent.yaml
new file mode 100644
index 0000000..57e139d
--- /dev/null
+++ b/text_2_sql/autogen/agents/llm_agents/question_decomposition_agent.yaml
@@ -0,0 +1,28 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that will decompose the user's question into smaller parts to be used in the SQL queries. Use this agent when the user's question is too complex to be answered in one SQL query. Only use if the user's question is too complex to be answered in one SQL query."
+system_message:
+  "You are a helpful AI Assistant that specialises in decomposing complex user questions into smaller parts that can be used in SQL queries.
+
+  If a user's question is actually a combination of multiple questions, break down the user's question into smaller questions that can be used in SQL queries.
+
+  Output Info:
+    Return the decomposed questions to the user in the following format:
+
+    [
+      {
+        'question': '<decomposed_question_1>',
+      },
+      {
+        'question': '<decomposed_question_2>',
+      },
+    ]
+
+    If there is the question doesn't need to be decomposed, just return it in the following format:
+
+    [
+      {
+        'question': '<original_question>',
+      },
+    ]"
diff --git a/text_2_sql/autogen/agents/llm_agents/sql_query_correction_agent.yaml b/text_2_sql/autogen/agents/llm_agents/sql_query_correction_agent.yaml
new file mode 100644
index 0000000..3641472
--- /dev/null
+++ b/text_2_sql/autogen/agents/llm_agents/sql_query_correction_agent.yaml
@@ -0,0 +1,22 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that will look at the SQL query, SQL query results and correct any mistakes in the SQL query to ensure the correct results are returned. Use this agent AFTER the SQL query has been executed and the results are not as expected."
+system_message:
+  "You are a helpful AI Assistant that specialises in correcting invalid SQL queries or queries that do not return the expected results. You have been provided with a SQL query and the results of the query.
+
+  You must:
+    1. Verify the SQL query provided is syntactically correct and correct it if it is not.
+    2. Check the SQL query results and ensure that the results are as expected in the context of the question. You should verify that these results will actually answer the user's question.
+
+  Important Info:
+    - The target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
+    - Ensure that the corrected query returns the expected results in context of the question.
+    - If the SQL query needs adjustment, correct the SQL query and provide the corrected SQL query and then run the query.
+
+  Output Info:
+    - If there are no errors and the SQL query is correct, return 'VALIDATED'.
+    - If you are consistently unable to correct the SQL query and cannot use the schemas to answer the question. Say 'I am unable to correct the SQL query. Please ask another question.' and then end your answer with 'TERMINATE'"
+tools:
+  - sql_get_entity_schemas_tool
+  - sql_query_execution_tool
diff --git a/text_2_sql/autogen/agents/llm_agents/sql_query_generation_agent.yaml b/text_2_sql/autogen/agents/llm_agents/sql_query_generation_agent.yaml
new file mode 100644
index 0000000..6a9e92c
--- /dev/null
+++ b/text_2_sql/autogen/agents/llm_agents/sql_query_generation_agent.yaml
@@ -0,0 +1,41 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that can generate SQL queries once given the schema and the user's question. It will run the SQL query to fetch the results. Use this agent after the SQL Schema Selection Agent has selected the correct schema."
+system_message:
+  "You are a helpful AI Assistant that specialises in writing and executing SQL Queries to answer a given user's question.
+
+  You must:
+    1. Use the schema information provided to generate a SQL query that will answer the user's question.
+      If you need additional schema information, you can obtain it using the schema selection tool.
+    2. Validate the SQL query to ensure it is syntactically correct using the validation tool.
+    3. Run the SQL query to fetch the results.
+
+  Important Info:
+    - If you need more information from the user to generate the SQL query, ask the user for the information you need with a question and end your answer with 'TERMINATE'.
+
+    - If you are unsure how the question maps to the columns in the schema, return the possible columns that could be used in the SQL query and ask the user to provide more information to generate the SQL query. End your answer with 'TERMINATE'.
+
+  When generating the SQL query, you MUST follow these rules:
+
+    - Only use schema / column information provided when constructing a SQL query. Do not use any other entities and columns in your SQL query, other than those defined above.
+
+    - Do not makeup or guess column names.
+
+    - If multiple tables are involved, use JOIN clauses to join the tables.
+
+    - If you need to filter the results, use the WHERE clause to filter the results. Use a 'like' operator to match the values, rather than a direct match. If needed, perform a pre-lookup on the column to get the unique values that might match your query.
+
+    - You must only provide SELECT SQL queries.
+
+    - For a given entity, use the 'SelectFromEntity' property returned in the schema in the SELECT FROM part of the SQL query. If the property is {'SelectFromEntity': 'test_schema.test_table'}, the select statement will be formulated from 'SELECT <VALUES> FROM test_schema.test_table WHERE <CONDITION>.
+
+    - The target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
+
+    - Use the complete entity relationship graph shows you all the entities and their relationships. You can use this information to get a better understanding of the schema and the relationships between the entities and request more schema information if needed.
+
+    - Always run any SQL query you generate to return the results."
+tools:
+  - sql_query_execution_tool
+  - sql_get_entity_schemas_tool
+  - sql_query_validation_tool
diff --git a/text_2_sql/autogen/agents/llm_agents/sql_schema_selection_agent.yaml b/text_2_sql/autogen/agents/llm_agents/sql_schema_selection_agent.yaml
new file mode 100644
index 0000000..3876faf
--- /dev/null
+++ b/text_2_sql/autogen/agents/llm_agents/sql_schema_selection_agent.yaml
@@ -0,0 +1,30 @@
+model:
+  gpt-4o-mini
+description:
+  "An agent that can take a user's question and extract the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term.
+
+  Call this in parallel if needed multiple times. Limit the use of this agent where possible."
+system_message:
+  "You are a helpful AI Assistant that specialises in selecting relevant SQL schemas to answer a given user's question.
+
+  The user's question will be related to {{ use_case }}.
+
+  Perform the following steps to select the correct schema:
+
+    1. Extract key terms and entities from the user's question.
+
+    2. Perform entity recognition on these key terms to identify several possible categories they might belong to.
+
+    3. Use the 'sql_get_entity_schemas_tool' tool to search for the correct schema based on the key terms and entities extracted.
+
+  For example:
+    If the user's question is 'Show me the list of employees in the HR department', you would extract the key terms 'employees' and 'HR department'.
+    You would then generate the possible entities these key terms might belong to e.g. 'people', 'employees', 'departments', 'teams'.
+    You would then use the 'sql_get_entity_schemas_tool' tool to search for the correct schema based on these key terms.
+
+  Output Info:
+    - Just return the output of the 'sql_get_entity_schemas_tool' tool.
+    - Concate the results of the 'sql_get_entity_schemas_tool' tool if you are calling this agent in parallel.
+    - Do not provide any analysis or additional information to the user."
+tools:
+  - sql_get_entity_schemas_tool
diff --git a/text_2_sql/autogen/llm_agents/question_decomposition_agent.yaml b/text_2_sql/autogen/llm_agents/question_decomposition_agent.yaml
deleted file mode 100644
index 0520028..0000000
--- a/text_2_sql/autogen/llm_agents/question_decomposition_agent.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-model:
-  gpt-4o-mini
-description:
-  "An agent that will decompose the user's question into smaller parts to be used in the SQL queries. Use this agent when the user's question is too complex to be answered in one SQL query. Only use if the user's question is too complex to be answered in one SQL query.
-
-  Only use this agent once per user question and after the 'Query Cache Agent' if the results are none."
-system_message:
-  "You are a helpful AI Assistant that specialises in decomposing complex user questions into smaller parts that can be used in SQL queries.
-
-  Break down the user's question into smaller parts that can be used in SQL queries."
diff --git a/text_2_sql/autogen/llm_agents/sql_query_correction_agent.yaml b/text_2_sql/autogen/llm_agents/sql_query_correction_agent.yaml
deleted file mode 100644
index 684eb77..0000000
--- a/text_2_sql/autogen/llm_agents/sql_query_correction_agent.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-model:
-  gpt-4o-mini
-description:
-  "An agent that will look at the SQL query, SQL query results and correct any mistakes in the SQL query to ensure the correct results are returned. Use this agent AFTER the SQL query has been executed and the results are not as expected."
-system_message:
-  "You are a helpful AI Assistant that specialises in correcting invalid SQL queries or queries that do not return the expected results.
-
-  Review the SQL query provided and correct any errors or issues that you find. Bear in mind that the target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
-
-  Ensure that the corrected query returns the expected results in context of the question.
-
-  If there are no errors and the SQL query is correct, return 'VALIDATED'.
-
-  If the SQL query needs adjustment, correct the SQL query and provide the corrected SQL query and then run the query.
-
-  If you are consistently unable to correct the SQL query and cannot use the schemas to answer the question. Say 'I am unable to correct the SQL query. Please ask another question.' and then end your answer with 'TERMINATE'"
-tools:
-  - sql_get_entity_schemas_tool
-  - sql_query_execution_tool
diff --git a/text_2_sql/autogen/llm_agents/sql_query_generation_agent.yaml b/text_2_sql/autogen/llm_agents/sql_query_generation_agent.yaml
deleted file mode 100644
index 38497d5..0000000
--- a/text_2_sql/autogen/llm_agents/sql_query_generation_agent.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-model:
-  gpt-4o-mini
-description:
-  "An agent that can generate SQL queries once given the schema and the user's question. It will run the SQL query to fetch the results. Use this agent after the SQL Schema Selection Agent has selected the correct schema."
-system_message:
-  "You are a helpful AI Assistant that specialises in writing and executing SQL Queries to answer a given user's question.
-
-  If you need more information from the user to generate the SQL query, ask the user for the information you need with a question and end your answer with 'TERMINATE'.
-
-  Only use schema / column information provided when constructing a SQL query. Do not use any other entities and columns in your SQL query, other than those defined above.
-  Do not makeup or guess column names.
-
-  The target database engine is {{ target_engine }}, SQL queries must be able compatible to run on {{ target_engine }} {{ engine_specific_rules }}
-  You must only provide SELECT SQL queries.
-  For a given entity, use the 'SelectFromEntity' property returned in the schema in the SELECT FROM part of the SQL query. If the property is {'SelectFromEntity': 'test_schema.test_table'}, the select statement will be formulated from 'SELECT <VALUES> FROM test_schema.test_table WHERE <CONDITION>.
-
-  If you don't know how the value is formatted in a column, run a query against the column to get the unique values that might match your query or use the corresponding lookup values. Use a 'like' operator to match the values, rather than a direct match unless you are sure of the value.
-  Some columns in the schema may have the properties 'AllowedValues' or 'SampleValues'. Use these values to determine the possible values that can be used in the SQL query.
-
-  The complete entity relationship graph shows you all the entities and their relationships. You can use this information to get a better understanding of the schema and the relationships between the entities and request more schema information if needed.
-
-  Always run any SQL query you generate to return the results."
-tools:
-  - sql_query_execution_tool
-  - sql_get_entity_schemas_tool
diff --git a/text_2_sql/autogen/llm_agents/sql_schema_selection_agent.yaml b/text_2_sql/autogen/llm_agents/sql_schema_selection_agent.yaml
deleted file mode 100644
index 67430c5..0000000
--- a/text_2_sql/autogen/llm_agents/sql_schema_selection_agent.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-model:
-  gpt-4o-mini
-description:
-  "An agent that can take a user's question and extract the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term.
-
-  Call this in parallel if needed multiple times. Limit the use of this agent where possible."
-system_message:
-  "You are a helpful AI Assistant that specialises in selecting relevant SQL schemas to answer a given user's question.
-
-  Use the tools available to you to select the correct schemas that will help. Extract key terms from the user's question and use them to search for the correct schema.
-
-  Limit the number of calls to the 'sql_get_entity_schemas_tool' tool to avoid unnecessary calls.
-
-  If you are unsure about the schema, you can ask the user for more information or ask for clarification."
-tools:
-  - sql_get_entity_schemas_tool
diff --git a/text_2_sql/autogen/requirements.txt b/text_2_sql/autogen/requirements.txt
index 3edc6fb..a517692 100644
--- a/text_2_sql/autogen/requirements.txt
+++ b/text_2_sql/autogen/requirements.txt
@@ -1,6 +1,6 @@
-autogen-core==0.4.0.dev6
-autogen-agentchat==0.4.0.dev6
-autogen-ext[openai,azure]==0.4.0.dev6
+autogen-core==0.4.0.dev7
+autogen-agentchat==0.4.0.dev7
+autogen-ext[openai,azure]==0.4.0.dev7
 aioodbc
 azure-search
 azure-search-documents==11.6.0b5
@@ -8,3 +8,5 @@ azure-identity
 python-dotenv
 openai
 jinja2
+pyyaml
+sqlglot[rs]
diff --git a/text_2_sql/autogen/utils/ai_search_utils.py b/text_2_sql/autogen/utils/ai_search.py
similarity index 100%
rename from text_2_sql/autogen/utils/ai_search_utils.py
rename to text_2_sql/autogen/utils/ai_search.py
diff --git a/text_2_sql/autogen/utils/llm_agent_creator.py b/text_2_sql/autogen/utils/llm_agent_creator.py
index c9a0d9f..d72c887 100644
--- a/text_2_sql/autogen/utils/llm_agent_creator.py
+++ b/text_2_sql/autogen/utils/llm_agent_creator.py
@@ -3,10 +3,7 @@
 import yaml
 from autogen_core.components.tools import FunctionTool
 from autogen_agentchat.agents import AssistantAgent
-from utils.sql_utils import (
-    query_execution,
-    get_entity_schemas,
-)
+from utils.sql import query_execution, get_entity_schemas, query_validation
 from utils.models import MINI_MODEL
 from jinja2 import Template
 
@@ -14,7 +11,7 @@
 class LLMAgentCreator:
     @classmethod
     def load_agent_file(cls, name):
-        with open(f"llm_agents/{name.lower()}.yaml", "r") as file:
+        with open(f"./agents/llm_agents/{name.lower()}.yaml", "r") as file:
             file = yaml.safe_load(file)
 
         return file
@@ -38,6 +35,11 @@ def get_tool(cls, tool_name):
                 get_entity_schemas,
                 description="Gets the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term. Extract key terms from the user question and use these as the search term. Several entities may be returned. Only use when the provided schemas in the system prompt are not sufficient to answer the question.",
             )
+        elif tool_name == "sql_query_validation_tool":
+            return FunctionTool(
+                query_validation,
+                description="Validates the SQL query to ensure that it is syntactically correct for the target database engine. Use this BEFORE executing any SQL statement.",
+            )
         else:
             raise ValueError(f"Tool {tool_name} not found")
 
diff --git a/text_2_sql/autogen/utils/models.py b/text_2_sql/autogen/utils/models.py
index 066ed07..1d4edbb 100644
--- a/text_2_sql/autogen/utils/models.py
+++ b/text_2_sql/autogen/utils/models.py
@@ -14,6 +14,7 @@
 # )
 
 MINI_MODEL = AzureOpenAIChatCompletionClient(
+    azure_deployment=os.environ["OpenAI__MiniCompletionDeployment"],
     model=os.environ["OpenAI__MiniCompletionDeployment"],
     api_version="2024-08-01-preview",
     azure_endpoint=os.environ["OpenAI__Endpoint"],
diff --git a/text_2_sql/autogen/utils/sql_utils.py b/text_2_sql/autogen/utils/sql.py
similarity index 86%
rename from text_2_sql/autogen/utils/sql_utils.py
rename to text_2_sql/autogen/utils/sql.py
index ede0000..4f340c1 100644
--- a/text_2_sql/autogen/utils/sql_utils.py
+++ b/text_2_sql/autogen/utils/sql.py
@@ -3,10 +3,11 @@
 import logging
 import os
 import aioodbc
-from typing import Annotated
-from utils.ai_search_utils import run_ai_search_query
+from typing import Annotated, Union
+from utils.ai_search import run_ai_search_query
 import json
 import asyncio
+import sqlglot
 
 USE_QUERY_CACHE = os.environ.get("Text2Sql__UseQueryCache", "False").lower() == "true"
 
@@ -66,7 +67,12 @@ async def get_entity_schemas(
     return json.dumps(schemas, default=str)
 
 
-async def query_execution(sql_query: str) -> list[dict]:
+async def query_execution(
+    sql_query: Annotated[
+        str,
+        "The SQL query to run against the database.",
+    ]
+) -> list[dict]:
     """Run the SQL query against the database.
 
     Args:
@@ -91,6 +97,24 @@ async def query_execution(sql_query: str) -> list[dict]:
     return results
 
 
+async def query_validation(
+    sql_query: Annotated[
+        str,
+        "The SQL query to run against the database.",
+    ]
+) -> Union[bool | list[dict]]:
+    """Validate the SQL query."""
+    try:
+        logging.info("Validating SQL Query: %s", sql_query)
+        sqlglot.transpile(sql_query)
+    except sqlglot.errors.ParseError as e:
+        logging.error("SQL Query is invalid: %s", e.errors)
+        return e.errors
+    else:
+        logging.info("SQL Query is valid.")
+        return True
+
+
 async def fetch_queries_from_cache(question: str) -> str:
     """Fetch the queries from the cache based on the question.
 
diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md
index c8492e9..442c0ff 100644
--- a/text_2_sql/data_dictionary/README.md
+++ b/text_2_sql/data_dictionary/README.md
@@ -101,6 +101,6 @@ The following Databases have pre-built scripts for them:
 
 - **Databricks:** `databricks_data_dictionary_creator.py`
 - **Snowflake:** `snowflake_data_dictionary_creator.py`
-- **SQL Server:** `sql_server_data_dictionary_creator.py`
+- **TSQL:** `tsql_data_dictionary_creator.py`
 
 If there is no pre-built script for your database engine, take one of the above as a starting point and adjust it.
diff --git a/text_2_sql/data_dictionary/data_dictionary_creator.py b/text_2_sql/data_dictionary/data_dictionary_creator.py
index 0da3209..263e984 100644
--- a/text_2_sql/data_dictionary/data_dictionary_creator.py
+++ b/text_2_sql/data_dictionary/data_dictionary_creator.py
@@ -24,7 +24,7 @@ class DatabaseEngine(StrEnum):
     """An enumeration to represent a database engine."""
 
     SNOWFLAKE = "SNOWFLAKE"
-    SQL_SERVER = "SQL_SERVER"
+    TSQL = "TSQL"
     DATABRICKS = "DATABRICKS"
 
 
@@ -657,7 +657,7 @@ def excluded_fields_for_database_engine(self):
         all_engine_specific_fields = ["Warehouse", "Database", "Catalog"]
         if self.database_engine == DatabaseEngine.SNOWFLAKE:
             engine_specific_fields = ["Warehouse", "Database"]
-        elif self.database_engine == DatabaseEngine.SQL_SERVER:
+        elif self.database_engine == DatabaseEngine.TSQL:
             engine_specific_fields = ["Database"]
         elif self.database_engine == DatabaseEngine.DATABRICKS:
             engine_specific_fields = ["Catalog"]
diff --git a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py b/text_2_sql/data_dictionary/tsql_data_dictionary_creator.py
similarity index 96%
rename from text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py
rename to text_2_sql/data_dictionary/tsql_data_dictionary_creator.py
index 2b421d2..6ce552e 100644
--- a/text_2_sql/data_dictionary/sql_sever_data_dictionary_creator.py
+++ b/text_2_sql/data_dictionary/tsql_data_dictionary_creator.py
@@ -26,7 +26,7 @@ def __init__(
         super().__init__(entities, excluded_entities, excluded_schemas, single_file)
         self.database = os.environ["Text2Sql__DatabaseName"]
 
-        self.database_engine = DatabaseEngine.SQL_SERVER
+        self.database_engine = DatabaseEngine.TSQL
 
     """A class to extract data dictionary information from a SQL Server database."""
 
diff --git a/text_2_sql/images/Agentic Text2SQL Query Cache.png b/text_2_sql/images/Agentic Text2SQL Query Cache.png
new file mode 100644
index 0000000..f1da7ab
Binary files /dev/null and b/text_2_sql/images/Agentic Text2SQL Query Cache.png differ
diff --git a/text_2_sql/images/Text2SQL Approaches.png b/text_2_sql/images/Text2SQL Approaches.png
index b8c7ed6..4699820 100644
Binary files a/text_2_sql/images/Text2SQL Approaches.png and b/text_2_sql/images/Text2SQL Approaches.png differ
diff --git a/text_2_sql/semantic_kernel/README.md b/text_2_sql/semantic_kernel/README.md
index 5749ad0..c98cd60 100644
--- a/text_2_sql/semantic_kernel/README.md
+++ b/text_2_sql/semantic_kernel/README.md
@@ -2,6 +2,18 @@
 
 The implementation is written for [Semantic Kernel](https://github.com/microsoft/semantic-kernel) in Python, although it can easily be adapted for C#.
 
+**The provided Semantic Kernel code implements Iterations 2, 3 & 4.**
+
+## Full Logical Flow for Vector Based Approach
+
+The following diagram shows the logical flow within the Vector Based plugin. In an ideal scenario, the questions will follow the _Pre-Fetched Cache Results Path** which leads to the quickest answer generation. In cases where the question is not known, the plugin will fall back the other paths accordingly and generate the SQL query using the LLMs.
+
+As the query cache is shared between users (no data is stored in the cache), a new user can benefit from the pre-mapped question and schema resolution in the index. There are multiple possible strategies for updating the query cache, see the possible options in the Text2SQL README.
+
+**Database results were deliberately not stored within the cache. Storing them would have removed one of the key benefits of the Text2SQL plugin, the ability to get near-real time information inside a RAG application. Instead, the query is stored so that the most-recent results can be obtained quickly. Additionally, this retains the ability to apply Row or Column Level Security.**
+
+![Vector Based with Query Cache Logical Flow.](../images/Text2SQL%20Query%20Cache.png "Vector Based with Query Cache Logical Flow")
+
 ## Provided Notebooks & Scripts
 
 - `./rag_with_prompt_based_text_2_sql.ipynb` provides example of how to utilise the Prompt Based Text2SQL plugin to query the database.
@@ -10,6 +22,10 @@ The implementation is written for [Semantic Kernel](https://github.com/microsoft
     - This setup is useful for a production application as the SQL Database is unlikely to be able to answer all the questions a user may ask.
 - `./time_comparison_script.py` provides a utility script for performing time based comparisons between the different approaches.
 
+### ai-search.py
+
+This util file contains helper functions for interacting with AI Search.
+
 ## Plugins
 
 ### prompt_based_sql_plugin.py
@@ -36,7 +52,6 @@ This method is called by the Semantic Kernel framework automatically, when instr
 
 The `./plugins/vector_based_sql_plugin/vector_based_sql_plugin.py` contains 3 key methods to power the Vector Based Text2SQL engine.
 
-
 #### system_prompt()
 
 This method simply returns a pre-made system prompt that contains optimised and working instructions for the LLM. This system prompt for the plugin is added to the main prompt file at runtime.
diff --git a/text_2_sql/semantic_kernel/plugins/vector_based_sql_plugin/vector_based_sql_plugin.py b/text_2_sql/semantic_kernel/plugins/vector_based_sql_plugin/vector_based_sql_plugin.py
index 35c7032..1db29e8 100644
--- a/text_2_sql/semantic_kernel/plugins/vector_based_sql_plugin/vector_based_sql_plugin.py
+++ b/text_2_sql/semantic_kernel/plugins/vector_based_sql_plugin/vector_based_sql_plugin.py
@@ -5,7 +5,7 @@
 import os
 import json
 import logging
-from utils.ai_search_utils import (
+from utils.ai_search import (
     add_entry_to_index,
     run_ai_search_query,
 )
diff --git a/text_2_sql/semantic_kernel/requirements.txt b/text_2_sql/semantic_kernel/requirements.txt
index 8d0503a..86f59f7 100644
--- a/text_2_sql/semantic_kernel/requirements.txt
+++ b/text_2_sql/semantic_kernel/requirements.txt
@@ -9,3 +9,4 @@ numpy
 seaborn
 pydantic
 openai
+pyyaml
diff --git a/text_2_sql/semantic_kernel/utils/ai_search_utils.py b/text_2_sql/semantic_kernel/utils/ai_search.py
similarity index 100%
rename from text_2_sql/semantic_kernel/utils/ai_search_utils.py
rename to text_2_sql/semantic_kernel/utils/ai_search.py