Make fields searchable in vector DB #15409
Replies: 1 comment 12 replies
-
To make the specified fields ("date", "userid", "contractid", "title", "clauseid", "summary", "keywords") searchable in the
Here is the updated code: from azure.search.documents.indexes.models import (
SearchIndex,
SimpleField,
SearchableField,
SearchField,
SearchFieldDataType,
VectorSearch,
VectorSearchAlgorithmKind,
VectorSearchAlgorithmMetric,
HnswAlgorithmConfiguration,
HnswParameters,
ExhaustiveKnnAlgorithmConfiguration,
ExhaustiveKnnParameters,
VectorSearchProfile,
SemanticConfiguration,
SemanticField,
SemanticPrioritizedFields,
SemanticSearch,
)
def _create_index(self, index_name: Optional[str]) -> None:
fields = [
SimpleField(name=self._field_mapping["id"], type="Edm.String", key=True),
SearchableField(
name=self._field_mapping["chunk"],
type="Edm.String",
analyzer_name=self._language_analyzer,
),
SearchField(
name=self._field_mapping["embedding"],
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
vector_search_dimensions=self._embedding_dimensionality,
vector_search_profile_name=self._vector_profile_name,
),
SearchableField(name=self._field_mapping["metadata"], type="Edm.String"),
SimpleField(
name=self._field_mapping["doc_id"], type="Edm.String", filterable=True
),
]
# Add metadata fields as searchable fields
metadata_index_fields = [
SearchableField(name="date", type="Edm.String"),
SearchableField(name="userid", type="Edm.String"),
SearchableField(name="contractid", type="Edm.String"),
SearchableField(name="title", type="Edm.String"),
SearchableField(name="clauseid", type="Edm.String"),
SearchableField(name="summary", type="Edm.String"),
SearchableField(name="keywords", type="Edm.String"),
]
fields.extend(metadata_index_fields)
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="myHnsw",
kind=VectorSearchAlgorithmKind.HNSW,
parameters=HnswParameters(
m=4,
ef_construction=400,
ef_search=500,
metric=VectorSearchAlgorithmMetric.COSINE,
),
),
ExhaustiveKnnAlgorithmConfiguration(
name="myExhaustiveKnn",
kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
parameters=ExhaustiveKnnParameters(
metric=VectorSearchAlgorithmMetric.COSINE,
),
),
],
profiles=[
VectorSearchProfile(
name="myHnswProfile",
algorithm_configuration_name="myHnsw",
),
VectorSearchProfile(
name="myExhaustiveKnnProfile",
algorithm_configuration_name="myExhaustiveKnn",
),
],
)
semantic_config = SemanticConfiguration(
name="mySemanticConfig",
prioritized_fields=SemanticPrioritizedFields(
content_fields=[SemanticField(field_name=self._field_mapping["chunk"])],
),
)
semantic_search = SemanticSearch(configurations=[semantic_config])
index = SearchIndex(
name=index_name,
fields=fields,
vector_search=vector_search,
semantic_search=semantic_search,
)
self._index_client.create_index(index) This modification ensures that the fields "date", "userid", "contractid", "title", "clauseid", "summary", and "keywords" are searchable in the vector database [1][2]. |
Beta Was this translation helpful? Give feedback.
-
@dosu
When I make this index, the only fields that are searchable are chunk and embedding, but i want to make all of these also searchable but they are only retrievable and filterable right now : "date": ("date", MetadataIndexFieldType.STRING),
"userid": ("userid", MetadataIndexFieldType.STRING),
"contractid": ("contractid", MetadataIndexFieldType.STRING),
"title": ("title", MetadataIndexFieldType.STRING),
"clauseid": ("clauseid", MetadataIndexFieldType.STRING),
"summary": ("summary", MetadataIndexFieldType.STRING),
"keywords": ("keywords", MetadataIndexFieldType.STRING)
metadata_fields = {
"date": ("date", MetadataIndexFieldType.STRING),
"userid": ("userid", MetadataIndexFieldType.STRING),
"contractid": ("contractid", MetadataIndexFieldType.STRING),
"title": ("title", MetadataIndexFieldType.STRING),
"clauseid": ("clauseid", MetadataIndexFieldType.STRING),
"summary": ("summary", MetadataIndexFieldType.STRING),
"keywords": ("keywords", MetadataIndexFieldType.STRING)
}
Define the vector store
vector_store = AzureAISearchVectorStore(
search_or_index_client=index_client,
filterable_metadata_field_keys=metadata_fields,
index_name=index_name,
index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
id_field_key="id",
chunk_field_key="chunk",
embedding_field_key="embedding",
embedding_dimensionality=1536,
metadata_string_field_key="metadata",
doc_id_field_key="doc_id",
language_analyzer="en.lucene",
vector_algorithm_type="exhaustiveKnn",
)
Function to generate a summary for a clause using the LLM
def summarize_clause(llm, clause_text):
prompt = (
"prompt"
)
response = llm.complete(prompt)
summary = response.text.strip()
return summary
Function to generate keywords for a clause using the LLM
def generate_keywords(llm, clause_text):
prompt = (
"prompt"
)
response = llm.complete(prompt)
Create document objects using the clauses from the contract
documents = []
for i, item in enumerate(contract_data["contract"]): # Fixed to use "document"
clause_text = item["clause"]
clause_id = f"clause_{i+1}" # Fixed clause ID to use a proper format
Create the vector store index with the summarized documents
storage_context = StorageContext.from_defaults(vector_store=vector_store)
Settings.llm = llm
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context
)
Beta Was this translation helpful? Give feedback.
All reactions