From 46259daa72037dad5798d1eb3ccecac7e6d7b0f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kopyci=C5=84ski?= Date: Wed, 30 Oct 2024 20:20:45 +0100 Subject: [PATCH] [Security Assistant] Knowledge base switch to use `semantic_text` (#197007) ## Summary - `text_expansion` is deprecated, use `semantic_text` instead - fix KB index entry form field options - explicitly create inference endpoint on KB setup if `assistantKnowledgeBaseByDefault` is true - when upgrade from v1 update KB ingest pipeline and remove unnecessary processor, but keep the pipeline for the backward compatibility - switch to use `doc` update for KB entries due to the limitations od `semantic_text` https://www.elastic.co/guide/en/elasticsearch/reference/current/semantic-text.html#update-script - split loading Security labs content into smaller chunks --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Pedro Jaramillo (cherry picked from commit a8c54f2ea4f482285eda15c72c56da35d76a6719) # Conflicts: # packages/kbn-data-stream-adapter/src/field_maps/types.ts --- .../src/field_maps/types.ts | 2 + .../knowledge_base/crud_kb_route.gen.ts | 1 + .../knowledge_base/crud_kb_route.schema.yaml | 2 + .../entries/use_knowledge_base_entries.ts | 3 + .../use_knowledge_base_status.test.tsx | 1 + .../use_knowledge_base_status.tsx | 20 +- .../knowledge_base_settings.tsx | 4 +- .../index.tsx | 14 +- .../index_entry_editor.tsx | 2 +- .../translations.ts | 7 + .../use_knowledge_base_table.tsx | 36 ++- .../setup_knowledge_base_button.tsx | 22 +- .../server/__mocks__/msearch_query.ts | 10 +- .../server/__mocks__/vector_search_query.ts | 10 +- .../anonymization_fields/helpers.ts | 14 +- .../conversations/helpers.ts | 14 +- .../conversations/update_conversation.ts | 2 +- .../create_knowledge_base_entry.ts | 54 +--- .../field_maps_configuration.ts | 8 + .../knowledge_base/helpers.ts | 43 ++- .../knowledge_base/index.ts | 266 ++++++++++++++---- .../knowledge_base/ingest_pipeline.ts | 39 ++- .../knowledge_base/types.ts | 3 + .../prompts/helpers.ts | 14 +- .../server/ai_assistant_service/helpers.ts | 3 + .../server/ai_assistant_service/index.ts | 14 +- .../lib/data_stream/documents_data_writer.ts | 17 +- .../content_loaders/security_labs_loader.ts | 33 ++- .../server/routes/knowledge_base/constants.ts | 1 + .../get_knowledge_base_status.test.ts | 2 + .../get_knowledge_base_status.ts | 11 +- .../plugins/elastic_assistant/tsconfig.json | 3 +- .../configs/ess.config.ts | 5 + 33 files changed, 485 insertions(+), 195 deletions(-) diff --git a/packages/kbn-data-stream-adapter/src/field_maps/types.ts b/packages/kbn-data-stream-adapter/src/field_maps/types.ts index 4f42a6c6b686..7c8cbbcce385 100644 --- a/packages/kbn-data-stream-adapter/src/field_maps/types.ts +++ b/packages/kbn-data-stream-adapter/src/field_maps/types.ts @@ -53,5 +53,7 @@ export interface FieldMap { scaling_factor?: number; dynamic?: boolean | 'strict'; properties?: Record; + inference_id?: string; + copy_to?: string; }; } diff --git a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts index fd599f5798cd..aad215021da8 100644 --- a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts +++ b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts @@ -81,4 +81,5 @@ export const ReadKnowledgeBaseResponse = z.object({ is_setup_in_progress: z.boolean().optional(), pipeline_exists: z.boolean().optional(), security_labs_exists: z.boolean().optional(), + user_data_exists: z.boolean().optional(), }); diff --git a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml index a61e98602ab4..0e0f1e926791 100644 --- a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml +++ b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml @@ -78,6 +78,8 @@ paths: type: boolean security_labs_exists: type: boolean + user_data_exists: + type: boolean 400: description: Generic Error content: diff --git a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts index b41119779b21..0775ed2d27a3 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts +++ b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts @@ -24,6 +24,7 @@ export interface UseKnowledgeBaseEntriesParams { signal?: AbortSignal | undefined; toasts?: IToasts; enabled?: boolean; // For disabling if FF is off + isRefetching?: boolean; // For enabling polling } const defaultQuery: FindKnowledgeBaseEntriesRequestQuery = { @@ -56,6 +57,7 @@ export const useKnowledgeBaseEntries = ({ signal, toasts, enabled = false, + isRefetching = false, }: UseKnowledgeBaseEntriesParams) => useQuery( KNOWLEDGE_BASE_ENTRY_QUERY_KEY, @@ -73,6 +75,7 @@ export const useKnowledgeBaseEntries = ({ enabled, keepPreviousData: true, initialData: { page: 1, perPage: 100, total: 0, data: [] }, + refetchInterval: isRefetching ? 30000 : false, onError: (error: IHttpFetchError) => { if (error.name !== 'AbortError') { toasts?.addError(error, { diff --git a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx index 80ce3d27d8dc..83073b5770ba 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx @@ -34,6 +34,7 @@ const statusResponse = { elser_exists: true, index_exists: true, pipeline_exists: true, + security_labs_exists: true, }; const http = { diff --git a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx index 75e78f2a0694..45c6d011b46d 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx @@ -45,6 +45,8 @@ export const useKnowledgeBaseStatus = ({ { retry: false, keepPreviousData: true, + // Polling interval for Knowledge Base setup in progress + refetchInterval: (data) => (data?.is_setup_in_progress ? 30000 : false), // Deprecated, hoist to `queryCache` w/in `QueryClient. See: https://stackoverflow.com/a/76961109 onError: (error: IHttpFetchError) => { if (error.name !== 'AbortError') { @@ -86,12 +88,12 @@ export const useInvalidateKnowledgeBaseStatus = () => { * * @param kbStatus ReadKnowledgeBaseResponse */ -export const isKnowledgeBaseSetup = (kbStatus: ReadKnowledgeBaseResponse | undefined): boolean => { - return ( - (kbStatus?.elser_exists && - kbStatus?.security_labs_exists && - kbStatus?.index_exists && - kbStatus?.pipeline_exists) ?? - false - ); -}; +export const isKnowledgeBaseSetup = (kbStatus: ReadKnowledgeBaseResponse | undefined): boolean => + (kbStatus?.elser_exists && + kbStatus?.index_exists && + kbStatus?.pipeline_exists && + // Allows to use UI while importing Security Labs docs + (kbStatus?.security_labs_exists || + kbStatus?.is_setup_in_progress || + kbStatus?.user_data_exists)) ?? + false; diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx index a46ba652574f..7041bf909601 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx @@ -53,9 +53,9 @@ export const KnowledgeBaseSettings: React.FC = React.memo( const isSecurityLabsEnabled = kbStatus?.security_labs_exists ?? false; const isKnowledgeBaseSetup = (isElserEnabled && - isSecurityLabsEnabled && kbStatus?.index_exists && - kbStatus?.pipeline_exists) ?? + kbStatus?.pipeline_exists && + (isSecurityLabsEnabled || kbStatus?.user_data_exists)) ?? false; const isSetupInProgress = kbStatus?.is_setup_in_progress ?? false; const isSetupAvailable = kbStatus?.is_setup_available ?? false; diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx index 54ea159ff058..bc2d60941679 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx @@ -160,6 +160,7 @@ export const KnowledgeBaseSettingsManagement: React.FC = React.memo(({ d http, toasts, enabled: enableKnowledgeBaseByDefault, + isRefetching: kbStatus?.is_setup_in_progress, }); // Flyout Save/Cancel Actions @@ -190,13 +191,15 @@ export const KnowledgeBaseSettingsManagement: React.FC = React.memo(({ d indices.push(entry.index); } }); - return dataViews.getExistingIndices(indices); + + return indices.length ? dataViews.getExistingIndices(indices) : Promise.resolve([]); }, [entries.data]); const { getColumns } = useKnowledgeBaseTable(); const columns = useMemo( () => getColumns({ + isKbSetupInProgress: kbStatus?.is_setup_in_progress ?? false, existingIndices, isDeleteEnabled: (entry: KnowledgeBaseEntryResponse) => { return ( @@ -219,7 +222,14 @@ export const KnowledgeBaseSettingsManagement: React.FC = React.memo(({ d openFlyout(); }, }), - [entries.data, existingIndices, getColumns, hasManageGlobalKnowledgeBase, openFlyout] + [ + entries.data, + existingIndices, + getColumns, + hasManageGlobalKnowledgeBase, + kbStatus?.is_setup_in_progress, + openFlyout, + ] ); // Refresh button diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx index ff61c61ed742..dfc3cd008668 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx @@ -117,7 +117,7 @@ export const IndexEntryEditor: React.FC = React.memo( dataViews.getFieldsForWildcard({ pattern: entry?.index ?? '', }), - [] + [entry?.index] ); const fieldOptions = useMemo( diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts index b311f373c214..98af0eabea6b 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts @@ -372,3 +372,10 @@ export const MISSING_INDEX_TOOLTIP_CONTENT = i18n.translate( 'The index assigned to this knowledge base entry is unavailable. Check the permissions on the configured index, or that the index has not been deleted. You can update the index to be used for this knowledge entry, or delete the entry entirely.', } ); + +export const SECURITY_LABS_NOT_FULLY_LOADED = i18n.translate( + 'xpack.elasticAssistant.assistant.settings.knowledgeBaseSettingsManagement.securityLabsNotFullyLoadedTooltipContent', + { + defaultMessage: 'Security Labs content is not fully loaded. Click to reload.', + } +); diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx index 7180be139c28..cbdf97f116f7 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx @@ -11,6 +11,7 @@ import { EuiBasicTableColumn, EuiIcon, EuiText, + EuiLoadingSpinner, EuiToolTip, } from '@elastic/eui'; import { css } from '@emotion/react'; @@ -29,11 +30,16 @@ import * as i18n from './translations'; import { BadgesColumn } from '../../assistant/common/components/assistant_settings_management/badges'; import { useInlineActions } from '../../assistant/common/components/assistant_settings_management/inline_actions'; import { isSystemEntry } from './helpers'; +import { SetupKnowledgeBaseButton } from '../setup_knowledge_base_button'; const AuthorColumn = ({ entry }: { entry: KnowledgeBaseEntryResponse }) => { const { userProfileService } = useAssistantContext(); const userProfile = useAsync(async () => { + if (isSystemEntry(entry) || entry.createdBy === 'unknown') { + return; + } + const profile = await userProfileService?.bulkGet<{ avatar: UserProfileAvatarData }>({ uids: new Set([entry.createdBy]), dataPath: 'avatar', @@ -45,7 +51,7 @@ const AuthorColumn = ({ entry }: { entry: KnowledgeBaseEntryResponse }) => { () => userProfile?.value?.username ?? 'Unknown', [userProfile?.value?.username] ); - const userAvatar = userProfile.value?.avatar; + const userAvatar = userProfile?.value?.avatar; const badgeItem = isSystemEntry(entry) ? 'Elastic' : userName; const userImage = isSystemEntry(entry) ? ( { isEditEnabled, onDeleteActionClicked, onEditActionClicked, + isKbSetupInProgress, }: { existingIndices?: string[]; isDeleteEnabled: (entry: KnowledgeBaseEntryResponse) => boolean; isEditEnabled: (entry: KnowledgeBaseEntryResponse) => boolean; onDeleteActionClicked: (entry: KnowledgeBaseEntryResponse) => void; onEditActionClicked: (entry: KnowledgeBaseEntryResponse) => void; + isKbSetupInProgress: boolean; }): Array> => { return [ { @@ -180,11 +188,27 @@ export const useKnowledgeBaseTable = () => { { name: i18n.COLUMN_ENTRIES, render: (entry: KnowledgeBaseEntryResponse) => { - return isSystemEntry(entry) - ? entry.text - : entry.type === DocumentEntryType.value - ? '1' - : '-'; + return isSystemEntry(entry) ? ( + <> + {`${entry.text}`} + {isKbSetupInProgress ? ( + + ) : ( + + + + )} + + ) : entry.type === DocumentEntryType.value ? ( + '1' + ) : ( + '-' + ); }, }, { diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx index d697fc7120d0..948e45232028 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx @@ -6,15 +6,16 @@ */ import React, { useCallback } from 'react'; -import { EuiButton, EuiButtonEmpty, EuiToolTip } from '@elastic/eui'; +import { EuiButton, EuiButtonIcon, EuiButtonEmpty, EuiToolTip } from '@elastic/eui'; import { i18n } from '@kbn/i18n'; +import { css } from '@emotion/react'; import { useAssistantContext } from '../..'; import { useSetupKnowledgeBase } from '../assistant/api/knowledge_base/use_setup_knowledge_base'; import { useKnowledgeBaseStatus } from '../assistant/api/knowledge_base/use_knowledge_base_status'; interface Props { - display?: 'mini'; + display?: 'mini' | 'refresh'; } /** @@ -48,6 +49,23 @@ export const SetupKnowledgeBaseButton: React.FC = React.memo(({ display } }) : undefined; + if (display === 'refresh') { + return ( + + ); + } + return ( {display === 'mini' ? ( diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts index e411dfaa2f1e..ae5adcfab61a 100644 --- a/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts @@ -34,12 +34,10 @@ export const mSearchQueryBody: MsearchQueryBody = { ], must: [ { - text_expansion: { - 'vector.tokens': { - model_id: '.elser_model_2', - model_text: - 'Generate an ESQL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', - }, + semantic: { + field: 'semantic_text', + query: + 'Generate an ESQL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', }, }, ], diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts index 30fbd0ad2c58..04263c5d242b 100644 --- a/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts @@ -26,12 +26,10 @@ export const mockVectorSearchQuery: QueryDslQueryContainer = { ], must: [ { - text_expansion: { - 'vector.tokens': { - model_id: '.elser_model_2', - model_text: - 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', - }, + semantic: { + field: 'semantic_text', + query: + 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', }, }, ], diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts index 9a4a3b6e1c0c..0f577df4e56e 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts @@ -99,7 +99,8 @@ export const getUpdateScript = ({ isPatch?: boolean; }) => { return { - source: ` + script: { + source: ` if (params.assignEmpty == true || params.containsKey('allowed')) { ctx._source.allowed = params.allowed; } @@ -108,11 +109,12 @@ export const getUpdateScript = ({ } ctx._source.updated_at = params.updated_at; `, - lang: 'painless', - params: { - ...anonymizationField, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + lang: 'painless', + params: { + ...anonymizationField, // when assigning undefined in painless, it will remove property and wil set it to null + // for patch we don't want to remove unspecified value in payload + assignEmpty: !(isPatch ?? true), + }, }, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts index 9e52b4a7414a..bdd1107942cc 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts @@ -15,7 +15,8 @@ export const getUpdateScript = ({ isPatch?: boolean; }) => { return { - source: ` + script: { + source: ` if (params.assignEmpty == true || params.containsKey('api_config')) { if (ctx._source.api_config != null) { if (params.assignEmpty == true || params.api_config.containsKey('connector_id')) { @@ -70,11 +71,12 @@ export const getUpdateScript = ({ } ctx._source.updated_at = params.updated_at; `, - lang: 'painless', - params: { - ...conversation, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + lang: 'painless', + params: { + ...conversation, // when assigning undefined in painless, it will remove property and wil set it to null + // for patch we don't want to remove unspecified value in payload + assignEmpty: !(isPatch ?? true), + }, }, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts index 807fea2decd9..7e9ee336f6fe 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts @@ -76,7 +76,7 @@ export const updateConversation = async ({ }, }, refresh: true, - script: getUpdateScript({ conversation: params, isPatch }), + script: getUpdateScript({ conversation: params, isPatch }).script, }); if (response.failures && response.failures.length > 0) { diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts index 23f73501b105..09bb5b291ef9 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts @@ -139,55 +139,11 @@ export const getUpdateScript = ({ entry: UpdateKnowledgeBaseEntrySchema; isPatch?: boolean; }) => { + // Cannot use script for updating documents with semantic_text fields return { - source: ` - if (params.assignEmpty == true || params.containsKey('name')) { - ctx._source.name = params.name; - } - if (params.assignEmpty == true || params.containsKey('type')) { - ctx._source.type = params.type; - } - if (params.assignEmpty == true || params.containsKey('users')) { - ctx._source.users = params.users; - } - if (params.assignEmpty == true || params.containsKey('query_description')) { - ctx._source.query_description = params.query_description; - } - if (params.assignEmpty == true || params.containsKey('input_schema')) { - ctx._source.input_schema = params.input_schema; - } - if (params.assignEmpty == true || params.containsKey('output_fields')) { - ctx._source.output_fields = params.output_fields; - } - if (params.assignEmpty == true || params.containsKey('kb_resource')) { - ctx._source.kb_resource = params.kb_resource; - } - if (params.assignEmpty == true || params.containsKey('required')) { - ctx._source.required = params.required; - } - if (params.assignEmpty == true || params.containsKey('source')) { - ctx._source.source = params.source; - } - if (params.assignEmpty == true || params.containsKey('text')) { - ctx._source.text = params.text; - } - if (params.assignEmpty == true || params.containsKey('description')) { - ctx._source.description = params.description; - } - if (params.assignEmpty == true || params.containsKey('field')) { - ctx._source.field = params.field; - } - if (params.assignEmpty == true || params.containsKey('index')) { - ctx._source.index = params.index; - } - ctx._source.updated_at = params.updated_at; - ctx._source.updated_by = params.updated_by; - `, - lang: 'painless', - params: { - ...entry, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + doc: { + ...entry, + semantic_text: entry.text, }, }; }; @@ -247,7 +203,7 @@ export const transformToCreateSchema = ({ required: entry.required ?? false, source: entry.source, text: entry.text, - vector: undefined, + semantic_text: entry.text, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts index 0712664bbfee..348efb5a18f7 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts @@ -6,6 +6,8 @@ */ import { FieldMap } from '@kbn/data-stream-adapter'; +export const ASSISTANT_ELSER_INFERENCE_ID = 'elastic-security-ai-assistant-elser2'; + export const knowledgeBaseFieldMap: FieldMap = { '@timestamp': { type: 'date', @@ -169,6 +171,12 @@ export const knowledgeBaseFieldMapV2: FieldMap = { required: false, }, // Embeddings field + semantic_text: { + type: 'semantic_text', + array: false, + required: false, + inference_id: ASSISTANT_ELSER_INFERENCE_ID, + }, vector: { type: 'object', array: false, diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts index 59816b0b0c26..a19b3f094508 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts @@ -46,7 +46,7 @@ export const getKBVectorSearchQuery = ({ filter?: QueryDslQueryContainer | undefined; kbResource?: string | undefined; modelId: string; - query: string; + query?: string; required?: boolean | undefined; user: AuthenticatedUser; v2KnowledgeBaseEnabled: boolean; @@ -114,20 +114,37 @@ export const getKBVectorSearchQuery = ({ ], }; - return { - bool: { - must: [ - { - text_expansion: { - 'vector.tokens': { - model_id: modelId, - model_text: query, - }, + let semanticTextFilter: + | Array<{ semantic: { field: string; query: string } }> + | Array<{ + text_expansion: { 'vector.tokens': { model_id: string; model_text: string } }; + }> = []; + + if (v2KnowledgeBaseEnabled && query) { + semanticTextFilter = [ + { + semantic: { + field: 'semantic_text', + query, + }, + }, + ]; + } else if (!v2KnowledgeBaseEnabled) { + semanticTextFilter = [ + { + text_expansion: { + 'vector.tokens': { + model_id: modelId, + model_text: query as string, }, }, - ...requiredFilter, - ...resourceFilter, - ], + }, + ]; + } + + return { + bool: { + must: [...semanticTextFilter, ...requiredFilter, ...resourceFilter], ...userFilter, filter, minimum_should_match: 1, diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts index 64e7b00089c0..f985095661f3 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts @@ -8,6 +8,7 @@ import { MlTrainedModelDeploymentNodesStats, MlTrainedModelStats, + SearchTotalHits, } from '@elastic/elasticsearch/lib/api/types'; import type { MlPluginSetup } from '@kbn/ml-plugin/server'; import type { KibanaRequest } from '@kbn/core-http-server'; @@ -25,6 +26,8 @@ import pRetry from 'p-retry'; import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { StructuredTool } from '@langchain/core/tools'; import { ElasticsearchClient } from '@kbn/core/server'; +import { IndexPatternsFetcher } from '@kbn/data-views-plugin/server'; +import { map } from 'lodash'; import { AIAssistantDataClient, AIAssistantDataClientParams } from '..'; import { AssistantToolParams, GetElser } from '../../types'; import { @@ -38,6 +41,7 @@ import { transformESSearchToKnowledgeBaseEntry } from './transforms'; import { ESQL_DOCS_LOADED_QUERY, SECURITY_LABS_RESOURCE, + USER_RESOURCE, } from '../../routes/knowledge_base/constants'; import { getKBVectorSearchQuery, @@ -45,7 +49,11 @@ import { isModelAlreadyExistsError, } from './helpers'; import { getKBUserFilter } from '../../routes/knowledge_base/entries/utils'; -import { loadSecurityLabs } from '../../lib/langchain/content_loaders/security_labs_loader'; +import { + loadSecurityLabs, + getSecurityLabsDocsCount, +} from '../../lib/langchain/content_loaders/security_labs_loader'; +import { ASSISTANT_ELSER_INFERENCE_ID } from './field_maps_configuration'; /** * Params for when creating KbDataClient in Request Context Factory. Useful if needing to modify @@ -169,30 +177,83 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { this.options.logger.debug(`Checking if ELSER model '${elserId}' is deployed...`); try { - const esClient = await this.options.elasticsearchClientPromise; - const getResponse = await esClient.ml.getTrainedModelsStats({ - model_id: elserId, - }); + if (this.isV2KnowledgeBaseEnabled) { + return await this.isInferenceEndpointExists(); + } else { + const esClient = await this.options.elasticsearchClientPromise; + const getResponse = await esClient.ml.getTrainedModelsStats({ + model_id: elserId, + }); - // For standardized way of checking deployment status see: https://github.com/elastic/elasticsearch/issues/106986 - const isReadyESS = (stats: MlTrainedModelStats) => - stats.deployment_stats?.state === 'started' && - stats.deployment_stats?.allocation_status.state === 'fully_allocated'; + // For standardized way of checking deployment status see: https://github.com/elastic/elasticsearch/issues/106986 + const isReadyESS = (stats: MlTrainedModelStats) => + stats.deployment_stats?.state === 'started' && + stats.deployment_stats?.allocation_status.state === 'fully_allocated'; - const isReadyServerless = (stats: MlTrainedModelStats) => - (stats.deployment_stats?.nodes as unknown as MlTrainedModelDeploymentNodesStats[]).some( - (node) => node.routing_state.routing_state === 'started' - ); + const isReadyServerless = (stats: MlTrainedModelStats) => + (stats.deployment_stats?.nodes as unknown as MlTrainedModelDeploymentNodesStats[])?.some( + (node) => node.routing_state.routing_state === 'started' + ); - return getResponse.trained_model_stats.some( - (stats) => isReadyESS(stats) || isReadyServerless(stats) - ); + return getResponse.trained_model_stats?.some( + (stats) => isReadyESS(stats) || isReadyServerless(stats) + ); + } } catch (e) { + this.options.logger.debug(`Error checking if ELSER model '${elserId}' is deployed: ${e}`); // Returns 404 if it doesn't exist return false; } }; + public isInferenceEndpointExists = async (): Promise => { + try { + const esClient = await this.options.elasticsearchClientPromise; + + return !!(await esClient.inference.get({ + inference_id: ASSISTANT_ELSER_INFERENCE_ID, + task_type: 'sparse_embedding', + })); + } catch (error) { + this.options.logger.debug( + `Error checking if Inference endpoint ${ASSISTANT_ELSER_INFERENCE_ID} exists: ${error}` + ); + return false; + } + }; + + public createInferenceEndpoint = async () => { + const elserId = await this.options.getElserId(); + this.options.logger.debug(`Deploying ELSER model '${elserId}'...`); + try { + const esClient = await this.options.elasticsearchClientPromise; + if (this.isV2KnowledgeBaseEnabled) { + await esClient.inference.put({ + task_type: 'sparse_embedding', + inference_id: ASSISTANT_ELSER_INFERENCE_ID, + inference_config: { + service: 'elasticsearch', + service_settings: { + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 0, + max_number_of_allocations: 8, + }, + num_threads: 1, + model_id: elserId, + }, + task_settings: {}, + }, + }); + } + } catch (error) { + this.options.logger.error( + `Error creating inference endpoint for ELSER model '${elserId}':\n${error}` + ); + throw new Error(`Error creating inference endpoint for ELSER model '${elserId}':\n${error}`); + } + }; + /** * Downloads and deploys recommended ELSER (if not already), then loads ES|QL docs * @@ -238,8 +299,22 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { `Removed ${legacyESQL?.total} ESQL knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.` ); } + // Delete any existing Security Labs content + const securityLabsDocs = await esClient.deleteByQuery({ + index: this.indexTemplateAndPattern.alias, + query: { + bool: { + must: [{ terms: { kb_resource: [SECURITY_LABS_RESOURCE] } }], + }, + }, + }); + if (securityLabsDocs?.total) { + this.options.logger.info( + `Removed ${securityLabsDocs?.total} Security Labs knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.` + ); + } } catch (e) { - this.options.logger.info('No legacy ESQL knowledge base docs to delete'); + this.options.logger.info('No legacy ESQL or Security Labs knowledge base docs to delete'); } } @@ -259,19 +334,34 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { this.options.logger.debug(`ELSER model '${elserId}' is already installed`); } - const isDeployed = await this.isModelDeployed(); - if (!isDeployed) { - await this.deployModel(); - await pRetry( - async () => - (await this.isModelDeployed()) - ? Promise.resolve() - : Promise.reject(new Error('Model not deployed')), - { minTimeout: 2000, retries: 10 } - ); - this.options.logger.debug(`ELSER model '${elserId}' successfully deployed!`); + if (!this.isV2KnowledgeBaseEnabled) { + const isDeployed = await this.isModelDeployed(); + if (!isDeployed) { + await this.deployModel(); + await pRetry( + async () => + (await this.isModelDeployed()) + ? Promise.resolve() + : Promise.reject(new Error('Model not deployed')), + { minTimeout: 2000, retries: 10 } + ); + this.options.logger.debug(`ELSER model '${elserId}' successfully deployed!`); + } else { + this.options.logger.debug(`ELSER model '${elserId}' is already deployed`); + } } else { - this.options.logger.debug(`ELSER model '${elserId}' is already deployed`); + const inferenceExists = await this.isInferenceEndpointExists(); + if (!inferenceExists) { + await this.createInferenceEndpoint(); + + this.options.logger.debug( + `Inference endpoint for ELSER model '${elserId}' successfully deployed!` + ); + } else { + this.options.logger.debug( + `Inference endpoint for ELSER model '${elserId}' is already deployed` + ); + } } this.options.logger.debug(`Checking if Knowledge Base docs have been loaded...`); @@ -289,8 +379,9 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { this.options.setIsKBSetupInProgress(false); this.options.logger.error(`Error setting up Knowledge Base: ${e.message}`); throw new Error(`Error setting up Knowledge Base: ${e.message}`); + } finally { + this.options.setIsKBSetupInProgress(false); } - this.options.setIsKBSetupInProgress(false); }; /** @@ -385,15 +476,87 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { }; /** - * Returns if Security Labs KB docs have been loaded + * Returns if user's KB docs exists + */ + + public isUserDataExists = async (): Promise => { + const user = this.options.currentUser; + if (user == null) { + throw new Error( + 'Authenticated user not found! Ensure kbDataClient was initialized from a request.' + ); + } + + const esClient = await this.options.elasticsearchClientPromise; + const modelId = await this.options.getElserId(); + + try { + const vectorSearchQuery = getKBVectorSearchQuery({ + kbResource: USER_RESOURCE, + required: false, + user, + modelId, + v2KnowledgeBaseEnabled: this.options.v2KnowledgeBaseEnabled, + }); + + const result = await esClient.search({ + index: this.indexTemplateAndPattern.alias, + size: 0, + query: vectorSearchQuery, + track_total_hits: true, + }); + + return !!(result.hits?.total as SearchTotalHits).value; + } catch (e) { + this.options.logger.debug(`Error checking if user's KB docs exist: ${e.message}`); + return false; + } + }; + + /** + * Returns if allSecurity Labs KB docs have been loaded */ public isSecurityLabsDocsLoaded = async (): Promise => { - const securityLabsDocs = await this.getKnowledgeBaseDocumentEntries({ - query: '', - kbResource: SECURITY_LABS_RESOURCE, - required: false, - }); - return securityLabsDocs.length > 0; + const user = this.options.currentUser; + if (user == null) { + throw new Error( + 'Authenticated user not found! Ensure kbDataClient was initialized from a request.' + ); + } + + const expectedDocsCount = await getSecurityLabsDocsCount({ logger: this.options.logger }); + + const esClient = await this.options.elasticsearchClientPromise; + const modelId = await this.options.getElserId(); + + try { + const vectorSearchQuery = getKBVectorSearchQuery({ + kbResource: SECURITY_LABS_RESOURCE, + required: false, + user, + modelId, + v2KnowledgeBaseEnabled: this.options.v2KnowledgeBaseEnabled, + }); + + const result = await esClient.search({ + index: this.indexTemplateAndPattern.alias, + size: 0, + query: vectorSearchQuery, + track_total_hits: true, + }); + + const existingDocs = (result.hits?.total as SearchTotalHits).value; + + if (existingDocs !== expectedDocsCount) { + this.options.logger.debug( + `Security Labs docs are not loaded, existing docs: ${existingDocs}, expected docs: ${expectedDocsCount}` + ); + } + return existingDocs === expectedDocsCount; + } catch (e) { + this.options.logger.info(`Error checking if Security Labs docs are loaded: ${e.message}`); + return false; + } }; /** @@ -423,10 +586,10 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { const vectorSearchQuery = getKBVectorSearchQuery({ filter, kbResource, - modelId, query, required, user, + modelId, v2KnowledgeBaseEnabled: this.options.v2KnowledgeBaseEnabled, }); @@ -576,7 +739,9 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { } try { - const elserId = await this.options.getElserId(); + const elserId = this.isV2KnowledgeBaseEnabled + ? ASSISTANT_ELSER_INFERENCE_ID + : await this.options.getElserId(); const userFilter = getKBUserFilter(user); const results = await this.findDocuments({ // Note: This is a magic number to set some upward bound as to not blow the context with too @@ -595,14 +760,21 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { if (results) { const entries = transformESSearchToKnowledgeBaseEntry(results.data) as IndexEntry[]; - return entries.map((indexEntry) => { - return getStructuredToolForIndexEntry({ - indexEntry, - esClient, - logger: this.options.logger, - elserId, - }); - }); + const indexPatternFetcher = new IndexPatternsFetcher(esClient); + const existingIndices = await indexPatternFetcher.getExistingIndices(map(entries, 'index')); + return ( + entries + // Filter out any IndexEntries that don't have an existing index + .filter((entry) => existingIndices.includes(entry.index)) + .map((indexEntry) => { + return getStructuredToolForIndexEntry({ + indexEntry, + esClient, + logger: this.options.logger, + elserId, + }); + }) + ); } } catch (e) { this.options.logger.error(`kbDataClient.getAssistantTools() - Failed to fetch IndexEntries`); diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts index e11840b94e66..8f459848af42 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts @@ -5,22 +5,31 @@ * 2.0. */ -// TODO: Ensure old pipeline is updated/replaced -export const knowledgeBaseIngestPipeline = ({ id, modelId }: { id: string; modelId: string }) => ({ +export const knowledgeBaseIngestPipeline = ({ + id, + modelId, + v2KnowledgeBaseEnabled, +}: { + id: string; + modelId: string; + v2KnowledgeBaseEnabled: boolean; +}) => ({ id, description: 'Embedding pipeline for Elastic AI Assistant ELSER Knowledge Base', - processors: [ - { - inference: { - if: 'ctx?.text != null', - model_id: modelId, - input_output: [ - { - input_field: 'text', - output_field: 'vector.tokens', + processors: !v2KnowledgeBaseEnabled + ? [ + { + inference: { + if: 'ctx?.text != null', + model_id: modelId, + input_output: [ + { + input_field: 'text', + output_field: 'vector.tokens', + }, + ], }, - ], - }, - }, - ], + }, + ] + : [], }); diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts index 3de1a15d79b2..443d03941ccd 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts @@ -27,6 +27,7 @@ export interface EsDocumentEntry { required: boolean; source: string; text: string; + semantic_text?: string; vector?: { tokens: Record; model_id: string; @@ -99,6 +100,7 @@ export interface UpdateKnowledgeBaseEntrySchema { required?: boolean; source?: string; text?: string; + semantic_text?: string; vector?: { tokens: Record; model_id: string; @@ -135,6 +137,7 @@ export interface CreateKnowledgeBaseEntrySchema { required?: boolean; source?: string; text?: string; + semantic_text?: string; vector?: { tokens: Record; model_id: string; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts index a4534972c847..eb71270127b2 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts @@ -143,7 +143,8 @@ export const getUpdateScript = ({ isPatch?: boolean; }) => { return { - source: ` + script: { + source: ` if (params.assignEmpty == true || params.containsKey('content')) { ctx._source.content = params.content; } @@ -158,11 +159,12 @@ export const getUpdateScript = ({ } ctx._source.updated_at = params.updated_at; `, - lang: 'painless', - params: { - ...prompt, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + lang: 'painless', + params: { + ...prompt, // when assigning undefined in painless, it will remove property and wil set it to null + // for patch we don't want to remove unspecified value in payload + assignEmpty: !(isPatch ?? true), + }, }, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts index 07da93032071..93338174364f 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts @@ -54,6 +54,7 @@ interface CreatePipelineParams { esClient: ElasticsearchClient; id: string; modelId: string; + v2KnowledgeBaseEnabled: boolean; } /** @@ -70,12 +71,14 @@ export const createPipeline = async ({ esClient, id, modelId, + v2KnowledgeBaseEnabled, }: CreatePipelineParams): Promise => { try { const response = await esClient.ingest.putPipeline( knowledgeBaseIngestPipeline({ id, modelId, + v2KnowledgeBaseEnabled, }) ); diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts index bfdf8b96f44b..a7b54dd5ca4b 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts @@ -97,7 +97,7 @@ export class AIAssistantService { this.knowledgeBaseDataStream = this.createDataStream({ resource: 'knowledgeBase', kibanaVersion: options.kibanaVersion, - fieldMap: knowledgeBaseFieldMap, // TODO: use V2 if FF is enabled + fieldMap: knowledgeBaseFieldMap, }); this.promptsDataStream = this.createDataStream({ resource: 'prompts', @@ -151,7 +151,9 @@ export class AIAssistantService { name: this.resourceNames.indexTemplate[resource], componentTemplateRefs: [this.resourceNames.componentTemplate[resource]], // Apply `default_pipeline` if pipeline exists for resource - ...(resource in this.resourceNames.pipelines + ...(resource in this.resourceNames.pipelines && + // Remove this param and initialization when the `assistantKnowledgeBaseByDefault` feature flag is removed + !(resource === 'knowledgeBase' && this.v2KnowledgeBaseEnabled) ? { template: { settings: { @@ -202,7 +204,12 @@ export class AIAssistantService { id: this.resourceNames.pipelines.knowledgeBase, }); // TODO: When FF is removed, ensure pipeline is re-created for those upgrading - if (!pipelineCreated || this.v2KnowledgeBaseEnabled) { + if ( + // Install for v1 + (!this.v2KnowledgeBaseEnabled && !pipelineCreated) || + // Upgrade from v1 to v2 + (pipelineCreated && this.v2KnowledgeBaseEnabled) + ) { this.options.logger.debug( `Installing ingest pipeline - ${this.resourceNames.pipelines.knowledgeBase}` ); @@ -210,6 +217,7 @@ export class AIAssistantService { esClient, id: this.resourceNames.pipelines.knowledgeBase, modelId: await this.getElserId(), + v2KnowledgeBaseEnabled: this.v2KnowledgeBaseEnabled, }); this.options.logger.debug(`Installed ingest pipeline: ${response}`); diff --git a/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts b/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts index 32b579fdeb71..08892038a58b 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts @@ -34,7 +34,10 @@ interface BulkParams { documentsToCreate?: TCreateParams[]; documentsToUpdate?: TUpdateParams[]; documentsToDelete?: string[]; - getUpdateScript?: (document: TUpdateParams, updatedAt: string) => Script; + getUpdateScript?: ( + document: TUpdateParams, + updatedAt: string + ) => { script?: Script; doc?: TUpdateParams }; authenticatedUser?: AuthenticatedUser; } @@ -73,7 +76,7 @@ export class DocumentsDataWriter implements DocumentsDataWriter { body: await this.buildBulkOperations(params), }, { - // Increasing timout to 2min as KB docs were failing to load after 30s + // Increasing timeout to 2min as KB docs were failing to load after 30s requestTimeout: 120000, } ); @@ -151,7 +154,10 @@ export class DocumentsDataWriter implements DocumentsDataWriter { private getUpdateDocumentsQuery = async ( documentsToUpdate: TUpdateParams[], - getUpdateScript: (document: TUpdateParams, updatedAt: string) => Script, + getUpdateScript: ( + document: TUpdateParams, + updatedAt: string + ) => { script?: Script; doc?: TUpdateParams }, authenticatedUser?: AuthenticatedUser ) => { const updatedAt = new Date().toISOString(); @@ -196,10 +202,7 @@ export class DocumentsDataWriter implements DocumentsDataWriter { _source: true, }, }, - { - script: getUpdateScript(document, updatedAt), - upsert: { counter: 1 }, - }, + getUpdateScript(document, updatedAt), ]); }; diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts index 10566b3e5a1d..f37e20df2bd9 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts @@ -5,13 +5,14 @@ * 2.0. */ +import globby from 'globby'; import { Logger } from '@kbn/core/server'; import { DirectoryLoader } from 'langchain/document_loaders/fs/directory'; import { TextLoader } from 'langchain/document_loaders/fs/text'; import { resolve } from 'path'; import { Document } from 'langchain/document'; import { Metadata } from '@kbn/elastic-assistant-common'; - +import pMap from 'p-map'; import { addRequiredKbResourceMetadata } from './add_required_kb_resource_metadata'; import { SECURITY_LABS_RESOURCE } from '../../../routes/knowledge_base/constants'; import { AIAssistantKnowledgeBaseDataClient } from '../../../ai_assistant_data_clients/knowledge_base'; @@ -42,10 +43,22 @@ export const loadSecurityLabs = async ( logger.info(`Loading ${docs.length} Security Labs docs into the Knowledge Base`); - const response = await kbDataClient.addKnowledgeBaseDocuments({ - documents: docs, - global: true, - }); + /** + * Ingest Security Labs docs into the Knowledge Base one by one to avoid blocking + * Inference Endpoint for too long + */ + + const response = ( + await pMap( + docs, + (singleDoc) => + kbDataClient.addKnowledgeBaseDocuments({ + documents: [singleDoc], + global: true, + }), + { concurrency: 1 } + ) + ).flat(); logger.info(`Loaded ${response?.length ?? 0} Security Labs docs into the Knowledge Base`); @@ -55,3 +68,13 @@ export const loadSecurityLabs = async ( return false; } }; + +export const getSecurityLabsDocsCount = async ({ logger }: { logger: Logger }): Promise => { + try { + return (await globby(`${resolve(__dirname, '../../../knowledge_base/security_labs')}/**/*.md`)) + ?.length; + } catch (e) { + logger.error(`Failed to get Security Labs source docs count\n${e}`); + return 0; + } +}; diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts index 89970611df0e..8bf17027e751 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts @@ -12,3 +12,4 @@ export const KNOWLEDGE_BASE_INGEST_PIPELINE = '.kibana-elastic-ai-assistant-kb-i export const ESQL_DOCS_LOADED_QUERY = 'You can chain processing commands, separated by a pipe character: `|`.'; export const SECURITY_LABS_RESOURCE = 'security_labs'; +export const USER_RESOURCE = 'user'; diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts index 6244599a2af2..b30e5ac3653a 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts @@ -38,6 +38,7 @@ describe('Get Knowledge Base Status Route', () => { isModelDeployed: jest.fn().mockResolvedValue(true), isSetupInProgress: false, isSecurityLabsDocsLoaded: jest.fn().mockResolvedValue(true), + isUserDataExists: jest.fn().mockResolvedValue(true), }); getKnowledgeBaseStatusRoute(server.router); @@ -58,6 +59,7 @@ describe('Get Knowledge Base Status Route', () => { is_setup_available: true, pipeline_exists: true, security_labs_exists: true, + user_data_exists: true, }); }); }); diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts index 833e674b68ff..f278cd469ac0 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts @@ -74,11 +74,18 @@ export const getKnowledgeBaseStatusRoute = (router: ElasticAssistantPluginRouter }; if (indexExists && isModelDeployed) { - const securityLabsExists = await kbDataClient.isSecurityLabsDocsLoaded(); + const securityLabsExists = v2KnowledgeBaseEnabled + ? await kbDataClient.isSecurityLabsDocsLoaded() + : true; + const userDataExists = v2KnowledgeBaseEnabled + ? await kbDataClient.isUserDataExists() + : true; + return response.ok({ body: { ...body, - security_labs_exists: v2KnowledgeBaseEnabled ? securityLabsExists : true, + security_labs_exists: securityLabsExists, + user_data_exists: userDataExists, }, }); } diff --git a/x-pack/plugins/elastic_assistant/tsconfig.json b/x-pack/plugins/elastic_assistant/tsconfig.json index 747a58ed930d..d3436f28a1d3 100644 --- a/x-pack/plugins/elastic_assistant/tsconfig.json +++ b/x-pack/plugins/elastic_assistant/tsconfig.json @@ -48,7 +48,8 @@ "@kbn/apm-utils", "@kbn/std", "@kbn/zod", - "@kbn/inference-plugin" + "@kbn/inference-plugin", + "@kbn/data-views-plugin" ], "exclude": [ "target/**/*", diff --git a/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts b/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts index 55860215ebfc..7954db769a6d 100644 --- a/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts +++ b/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts @@ -48,6 +48,11 @@ export default async function ({ readConfigFile }: FtrConfigProviderContext) { esTestCluster: { ...functionalConfig.get('esTestCluster'), ssl: false, + esJavaOpts: '-Xms4g -Xmx4g', + }, + mochaOpts: { + ...functionalConfig.get('mochaOpts'), + timeout: 360000 * 2, }, }; }