You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
indexer = SearchIndexer(
name=indexer_name,
description="Indexer to index documents and generate embeddings",
skillset_name=skillset_name,
target_index_name=index_name,
data_source_name=data_source.name,
# Map the metadata_storage_name field to the title field in the index to display the PDF title in the search results
field_mappings=[FieldMapping(source_field_name="metadata_storage_name", target_field_name="title"),
FieldMapping(source_field_name="metadata_storage_path", target_field_name="url"),
FieldMapping(source_field_name="metadata_storage_last_modified", target_field_name="last_modified")
] ,
output_field_mappings=[FieldMapping(source_field_name="/document/content/people", target_field_name="people"),
FieldMapping(source_field_name="/document/content/skills", target_field_name="skills")]
)
The text was updated successfully, but these errors were encountered:
I have tried using EntityRecognizer skill to extract the entities combined with SplitText, EmbeddingSkill.
On the output I am not getting extracted entities, it is showing null values for people, skills, locations etc.
PFB code:
fields = [
SearchField(name="parent_id", type=SearchFieldDataType.String, sortable=True, filterable=True, facetable=True),
SearchField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True, analyzer_name="keyword"),
SearchField(name="chunk", type=SearchFieldDataType.String, sortable=False, filterable=False, facetable=False),
SearchField(name="vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), vector_search_dimensions=1536, vector_search_profile_name="myHnswProfile"),
SearchField(name="title", type=SearchFieldDataType.String),
SearchField(name="url", type=SearchFieldDataType.String),
SearchField(name="last_modified", type=SearchFieldDataType.DateTimeOffset),
SearchField(name="people", type=SearchFieldDataType.String),
SearchField(name="skills", type=SearchFieldDataType.String)
]
entity_skill=EntityRecognitionSkill(
description="Skill Used to detect entities from the document",
context= "/document/content/",
categories=["Person",
"Email",
"Location",
"Organization",
"PhoneNumber",
"Address",
"Skill"
],
inputs=[
InputFieldMappingEntry(name="text", source="/document/content"),
InputFieldMappingEntry(name="languageCode", source="/document/language")
],
outputs=[
OutputFieldMappingEntry(name="persons", target_name="people"),
OutputFieldMappingEntry(name="skills", target_name="skills"),
],
)
index_projections = SearchIndexerIndexProjections(
selectors=[
SearchIndexerIndexProjectionSelector(
target_index_name=index_name,
parent_key_field_name="parent_id",
source_context="/document/pages/",
mappings=[
InputFieldMappingEntry(name="chunk", source="/document/pages/"),
InputFieldMappingEntry(name="vector", source="/document/pages/*/vector"),
InputFieldMappingEntry(name="title", source="/document/metadata_storage_name"),
InputFieldMappingEntry(name="people", source="/document/content/people"),
InputFieldMappingEntry(name="skills", source="/document/content/skills")
],
),
],
parameters=SearchIndexerIndexProjectionsParameters(
projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS
)
indexer = SearchIndexer(
name=indexer_name,
description="Indexer to index documents and generate embeddings",
skillset_name=skillset_name,
target_index_name=index_name,
data_source_name=data_source.name,
# Map the metadata_storage_name field to the title field in the index to display the PDF title in the search results
field_mappings=[FieldMapping(source_field_name="metadata_storage_name", target_field_name="title"),
FieldMapping(source_field_name="metadata_storage_path", target_field_name="url"),
FieldMapping(source_field_name="metadata_storage_last_modified", target_field_name="last_modified")
] ,
output_field_mappings=[FieldMapping(source_field_name="/document/content/people", target_field_name="people"),
FieldMapping(source_field_name="/document/content/skills", target_field_name="skills")]
)
The text was updated successfully, but these errors were encountered: