Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions deploy_ai_search/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ The associated scripts in this portion of the repository contains pre-built scri

## Steps for Text2SQL Index Deployment

### Entity Schema Index
### Schema Store Index

1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication.
2. Adjust `text_2_sql.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source.
2. Adjust `text_2_sql_schema_store.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source.
3. Run `deploy.py` with the following args:

- `index_type text_2_sql`. This selects the `Text2SQLAISearch` sub class.
- `index_type text_2_sql_schema_store`. This selects the `Text2SQLSchemaStoreAISearch` sub class.
- `rebuild`. Whether to delete and rebuild the index.
- `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version.
- `single_data_dictionary`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.
Expand Down
6 changes: 4 additions & 2 deletions deploy_ai_search/ai_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ def __init__(
"""

if not hasattr(self, "indexer_type"):
self.indexer_type = None # Needed to help mypy understand that indexer_type is defined in the child class
# Needed to help mypy understand that indexer_type is defined in the child class
self.indexer_type = None
raise ValueError("indexer_type is not defined in the child class.")

if rebuild is not None:
Expand Down Expand Up @@ -126,13 +127,14 @@ def get_index_fields(self) -> list[SearchableField]:
Returns:
list[SearchableField]: The index fields"""

@abstractmethod
def get_semantic_search(self) -> SemanticSearch:
"""Get the semantic search configuration for the indexer.

Returns:
SemanticSearch: The semantic search configuration"""

return None

def get_skills(self) -> list:
"""Get the skillset for the indexer.

Expand Down
6 changes: 3 additions & 3 deletions deploy_ai_search/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Licensed under the MIT License.
import argparse
from rag_documents import RagDocumentsAISearch
from text_2_sql import Text2SqlAISearch
from text_2_sql_schema_store import Text2SqlSchemaStoreAISearch
from text_2_sql_query_cache import Text2SqlQueryCacheAISearch
import logging

Expand All @@ -20,8 +20,8 @@ def deploy_config(arguments: argparse.Namespace):
rebuild=arguments.rebuild,
enable_page_by_chunking=arguments.enable_page_chunking,
)
elif arguments.index_type == "text_2_sql":
index_config = Text2SqlAISearch(
elif arguments.index_type == "text_2_sql_schema_store":
index_config = Text2SqlSchemaStoreAISearch(
suffix=arguments.suffix,
rebuild=arguments.rebuild,
single_data_dictionary=arguments.single_data_dictionary,
Expand Down
2 changes: 1 addition & 1 deletion deploy_ai_search/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class IndexerType(Enum):
"""The type of the indexer"""

RAG_DOCUMENTS = "rag-documents"
TEXT_2_SQL = "text-2-sql"
TEXT_2_SQL_SCHEMA_STORE = "text-2-sql-schema-store"
TEXT_2_SQL_QUERY_CACHE = "text-2-sql-query-cache"


Expand Down
2 changes: 1 addition & 1 deletion deploy_ai_search/rag_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def get_indexer(self) -> SearchIndexer:
indexer_parameters = IndexingParameters(
batch_size=batch_size,
configuration=IndexingParametersConfiguration(
data_to_extract=BlobIndexerDataToExtract.ALL_METADATA,
data_to_extract=BlobIndexerDataToExtract.STORAGE_METADATA,
query_timeout=None,
execution_environment=execution_environment,
fail_on_unprocessable_document=False,
Expand Down
78 changes: 32 additions & 46 deletions deploy_ai_search/text_2_sql_query_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
SearchFieldDataType,
SearchField,
SearchableField,
SemanticField,
SemanticPrioritizedFields,
SemanticConfiguration,
SemanticSearch,
SimpleField,
ComplexField,
)
Expand Down Expand Up @@ -52,42 +48,52 @@ def get_index_fields(self) -> list[SearchableField]:
vector_search_dimensions=self.environment.open_ai_embedding_dimensions,
vector_search_profile_name=self.vector_search_profile_name,
),
SearchableField(
name="Query", type=SearchFieldDataType.String, filterable=True
),
ComplexField(
name="Schemas",
name="SqlQueryDecomposition",
collection=True,
fields=[
SearchableField(
name="Entity",
name="SqlQuery",
type=SearchFieldDataType.String,
filterable=True,
),
ComplexField(
name="Columns",
name="Schemas",
collection=True,
fields=[
SearchableField(
name="Name", type=SearchFieldDataType.String
),
SearchableField(
name="Definition", type=SearchFieldDataType.String
),
SearchableField(
name="Type", type=SearchFieldDataType.String
),
SearchableField(
name="AllowedValues",
name="Entity",
type=SearchFieldDataType.String,
collection=True,
searchable=False,
filterable=True,
),
SearchableField(
name="SampleValues",
type=SearchFieldDataType.String,
ComplexField(
name="Columns",
collection=True,
searchable=False,
fields=[
SearchableField(
name="Name",
type=SearchFieldDataType.String,
),
SearchableField(
name="Definition",
type=SearchFieldDataType.String,
),
SearchableField(
name="DataType", type=SearchFieldDataType.String
),
SearchableField(
name="AllowedValues",
type=SearchFieldDataType.String,
collection=True,
searchable=False,
),
SearchableField(
name="SampleValues",
type=SearchFieldDataType.String,
collection=True,
searchable=False,
),
],
),
],
),
Expand All @@ -101,23 +107,3 @@ def get_index_fields(self) -> list[SearchableField]:
]

return fields

def get_semantic_search(self) -> SemanticSearch:
"""This function returns the semantic search configuration for sql index

Returns:
SemanticSearch: The semantic search configuration"""

semantic_config = SemanticConfiguration(
name=self.semantic_config_name,
prioritized_fields=SemanticPrioritizedFields(
title_field=SemanticField(field_name="Question"),
keywords_fields=[
SemanticField(field_name="Query"),
],
),
)

semantic_search = SemanticSearch(configurations=[semantic_config])

return semantic_search
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)


class Text2SqlAISearch(AISearch):
class Text2SqlSchemaStoreAISearch(AISearch):
"""This class is used to deploy the sql index."""

def __init__(
Expand All @@ -41,7 +41,7 @@ def __init__(
suffix (str, optional): The suffix for the indexer. Defaults to None. If an suffix is provided, it is assumed to be a test indexer.
rebuild (bool, optional): Whether to rebuild the index. Defaults to False.
"""
self.indexer_type = IndexerType.TEXT_2_SQL
self.indexer_type = IndexerType.TEXT_2_SQL_SCHEMA_STORE
super().__init__(suffix, rebuild)

if single_data_dictionary:
Expand All @@ -62,34 +62,43 @@ def get_index_fields(self) -> list[SearchableField]:
key=True,
analyzer_name="keyword",
),
SearchableField(
name="EntityName", type=SearchFieldDataType.String, filterable=True
),
SearchableField(
name="Entity",
type=SearchFieldDataType.String,
analyzer_name="keyword",
),
SearchableField(
name="EntityName", type=SearchFieldDataType.String, filterable=True
name="Database",
type=SearchFieldDataType.String,
),
SearchableField(
name="Description",
name="Warehouse",
type=SearchFieldDataType.String,
),
SearchableField(
name="Definition",
type=SearchFieldDataType.String,
sortable=False,
filterable=False,
facetable=False,
),
SearchField(
name="DescriptionEmbedding",
name="DefinitionEmbedding",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
vector_search_dimensions=self.environment.open_ai_embedding_dimensions,
vector_search_profile_name=self.vector_search_profile_name,
hidden=True,
),
ComplexField(
name="Columns",
collection=True,
fields=[
SearchableField(name="Name", type=SearchFieldDataType.String),
SearchableField(name="Definition", type=SearchFieldDataType.String),
SearchableField(name="Type", type=SearchFieldDataType.String),
SearchableField(name="DataType", type=SearchFieldDataType.String),
SearchableField(
name="AllowedValues",
type=SearchFieldDataType.String,
Expand All @@ -102,6 +111,11 @@ def get_index_fields(self) -> list[SearchableField]:
collection=True,
searchable=False,
),
SearchableField(
name="JoinableEntities",
type=SearchFieldDataType.String,
collection=True,
),
],
),
SearchableField(
Expand All @@ -111,6 +125,40 @@ def get_index_fields(self) -> list[SearchableField]:
hidden=True,
# This is needed to enable semantic searching against the column names as complex field types are not used.
),
SearchableField(
name="ColumnDefinitions",
type=SearchFieldDataType.String,
collection=True,
hidden=True,
# This is needed to enable semantic searching against the column names as complex field types are not used.
),
ComplexField(
name="EntityRelationships",
collection=True,
fields=[
SearchableField(
name="ForeignEntity",
type=SearchFieldDataType.String,
),
ComplexField(
name="ForeignKeys",
collection=True,
fields=[
SearchableField(
name="Column", type=SearchFieldDataType.String
),
SearchableField(
name="ForeignColumn", type=SearchFieldDataType.String
),
],
),
],
),
SearchableField(
name="CompleteEntityRelationshipsGraph",
type=SearchFieldDataType.String,
collection=True,
),
SimpleField(
name="DateLastModified",
type=SearchFieldDataType.DateTimeOffset,
Expand All @@ -131,7 +179,8 @@ def get_semantic_search(self) -> SemanticSearch:
prioritized_fields=SemanticPrioritizedFields(
title_field=SemanticField(field_name="EntityName"),
content_fields=[
SemanticField(field_name="Description"),
SemanticField(field_name="Definition"),
SemanticField(field_name="ColumnDefinitions"),
],
keywords_fields=[
SemanticField(field_name="ColumnNames"),
Expand All @@ -151,7 +200,7 @@ def get_skills(self) -> list:
list: The skillsets used in the indexer"""

embedding_skill = self.get_vector_skill(
"/document", "/document/Description", target_name="DescriptionEmbedding"
"/document", "/document/Definition", target_name="DefinitionEmbedding"
)

skills = [embedding_skill]
Expand Down Expand Up @@ -222,12 +271,20 @@ def get_indexer(self) -> SearchIndexer:
target_field_name="EntityName",
),
FieldMapping(
source_field_name="/document/Description",
target_field_name="Description",
source_field_name="/document/Database",
target_field_name="Database",
),
FieldMapping(
source_field_name="/document/DescriptionEmbedding",
target_field_name="DescriptionEmbedding",
source_field_name="/document/Warehouse",
target_field_name="Warehouse",
),
FieldMapping(
source_field_name="/document/Definition",
target_field_name="Definition",
),
FieldMapping(
source_field_name="/document/DefinitionEmbedding",
target_field_name="DefinitionEmbedding",
),
FieldMapping(
source_field_name="/document/Columns",
Expand All @@ -237,6 +294,18 @@ def get_indexer(self) -> SearchIndexer:
source_field_name="/document/Columns/*/Name",
target_field_name="ColumnNames",
),
FieldMapping(
source_field_name="/document/Columns/*/Definition",
target_field_name="ColumnDefinitions",
),
FieldMapping(
source_field_name="/document/EntityRelationships",
target_field_name="EntityRelationships",
),
FieldMapping(
source_field_name="/document/CompleteEntityRelationshipsGraph/*",
target_field_name="CompleteEntityRelationshipsGraph",
),
FieldMapping(
source_field_name="/document/DateLastModified",
target_field_name="DateLastModified",
Expand Down
10 changes: 0 additions & 10 deletions text_2_sql/data_dictionary/.env
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,7 @@ OpenAI__Endpoint=<openAIEndpoint>
OpenAI__ApiKey=<openAIKey if using non managed identity>
OpenAI__ApiVersion=<openAIApiVersion>
Text2Sql__DatabaseEngine=<databaseEngine>
Text2Sql__UseQueryCache=<whether to use the query cache first or not>
Text2Sql__PreRunQueryCache=<whether to pre-run the top result from the query cache or not>
Text2Sql__DatabaseName=<databaseName>
Text2Sql__DatabaseConnectionString=<databaseConnectionString>
AIService__AzureSearchOptions__Endpoint=<searchServiceEndpoint>
AIService__AzureSearchOptions__Key=<searchServiceKey if not using identity>
AIService__AzureSearchOptions__RagDocuments__Index=<ragDocumentsIndexName>
AIService__AzureSearchOptions__Text2Sql__Index=<text2SQLIndexName>
AIService__AzureSearchOptions__Text2SqlQueryCache__Index=<text2SQLIndexName>
AIService__AzureSearchOptions__RagDocuments__SemanticConfig=<ragDocumentsSemanticConfig>
AIService__AzureSearchOptions__Text2Sql__SemanticConfig=<text2SQLSemanticConfig>
AIService__AzureSearchOptions__Text2SqlQueryCache__SemanticConfig=<text2SQLSemanticConfig>
IdentityType=<identityType> # system_assigned or user_assigned or key
ClientId=<clientId if using user assigned identity>
Loading