From 126d62fbf342af18f2ca015c0fdfa1e167a42f4e Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 14 Mar 2024 11:11:49 +0100 Subject: [PATCH 1/3] use field caps include_empty_fields --- .../queries/fetch_index_info.ts | 48 +++++++--------- .../queries/get_random_docs_request.test.ts | 56 ------------------- .../queries/get_random_docs_request.ts | 35 ------------ 3 files changed, 19 insertions(+), 120 deletions(-) delete mode 100644 x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.test.ts delete mode 100644 x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.ts diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts index d8601691264f6..9311af1b91953 100644 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts +++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts @@ -12,7 +12,6 @@ import type { ElasticsearchClient } from '@kbn/core/server'; import type { AiopsLogRateAnalysisSchema } from '../../../../common/api/log_rate_analysis/schema'; -import { getRandomDocsRequest } from './get_random_docs_request'; import { getTotalDocCountRequest } from './get_total_doc_count_request'; // TODO Consolidate with duplicate `fetchPValues` in @@ -26,6 +25,8 @@ const SUPPORTED_ES_FIELD_TYPES = [ const SUPPORTED_ES_FIELD_TYPES_TEXT = [ES_FIELD_TYPES.TEXT, ES_FIELD_TYPES.MATCH_ONLY_TEXT]; +const IGNORE_FIELD_NAMES = ['_tier']; + interface IndexInfo { fieldCandidates: string[]; textFieldCandidates: string[]; @@ -46,14 +47,14 @@ export const fetchIndexInfo = async ( { index, fields: '*', + // @ts-expect-error include_empty_fields missing from FieldCapsRequest + include_empty_fields: false, }, { signal: abortSignal, maxRetries: 0 } ); const allFieldNames: string[] = []; - const finalFieldCandidates: Set = new Set([]); - const finalTextFieldCandidates: Set = new Set([]); const acceptableFields: Set = new Set(); const acceptableTextFields: Set = new Set(); @@ -64,11 +65,11 @@ export const fetchIndexInfo = async ( const isTextField = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES_TEXT.includes(type)); // Check if fieldName is something we can aggregate on - if (isSupportedType && isAggregatable) { + if (isSupportedType && isAggregatable && !IGNORE_FIELD_NAMES.includes(key)) { acceptableFields.add(key); } - if (isTextField) { + if (isTextField && !IGNORE_FIELD_NAMES.includes(key)) { acceptableTextFields.add(key); } @@ -84,46 +85,35 @@ export const fetchIndexInfo = async ( } ); - // Only the deviation window will be used to identify field candidates and sample probability based on total doc count. - const respDeviationRandomDocs = await esClient.search( - getRandomDocsRequest({ ...params, start: params.deviationMin, end: params.deviationMax }), + // Get the total doc count for the baseline time range + const respDeviationTotalDocCount = await esClient.search( + getTotalDocCountRequest({ ...params, start: params.deviationMin, end: params.deviationMax }), { signal: abortSignal, maxRetries: 0, } ); - const sampledDocs = respDeviationRandomDocs.hits.hits.map((d) => d.fields ?? {}); const textFieldCandidatesOverridesWithKeywordPostfix = textFieldCandidatesOverrides.map( (d) => `${d}.keyword` ); - // Get all field names for each returned doc and flatten it - // to a list of unique field names used across all docs - // and filter by list of acceptable fields. - [...new Set(sampledDocs.map(Object.keys).flat(1))].forEach((field) => { - if ( - acceptableFields.has(field) && - !textFieldCandidatesOverridesWithKeywordPostfix.includes(field) - ) { - finalFieldCandidates.add(field); - } - if ( - acceptableTextFields.has(field) && - (!allFieldNames.includes(`${field}.keyword`) || textFieldCandidatesOverrides.includes(field)) - ) { - finalTextFieldCandidates.add(field); - } - }); + const fieldCandidates: string[] = [...acceptableFields].filter( + (field) => !textFieldCandidatesOverridesWithKeywordPostfix.includes(field) + ); + const textFieldCandidates: string[] = [...acceptableTextFields].filter( + (field) => + !allFieldNames.includes(`${field}.keyword`) || textFieldCandidatesOverrides.includes(field) + ); const baselineTotalDocCount = (respBaselineTotalDocCount.hits.total as estypes.SearchTotalHits) .value; - const deviationTotalDocCount = (respDeviationRandomDocs.hits.total as estypes.SearchTotalHits) + const deviationTotalDocCount = (respDeviationTotalDocCount.hits.total as estypes.SearchTotalHits) .value; return { - fieldCandidates: [...finalFieldCandidates], - textFieldCandidates: [...finalTextFieldCandidates], + fieldCandidates, + textFieldCandidates, baselineTotalDocCount, deviationTotalDocCount, zeroDocsFallback: baselineTotalDocCount === 0 || deviationTotalDocCount === 0, diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.test.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.test.ts deleted file mode 100644 index 6e68c789142c5..0000000000000 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.test.ts +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import { paramsSearchQueryMock } from './__mocks__/params_search_query'; - -import { getRandomDocsRequest } from './get_random_docs_request'; - -describe('getRandomDocsRequest', () => { - it('returns the most basic request body for a sample of random documents', () => { - const req = getRandomDocsRequest(paramsSearchQueryMock); - - expect(req).toEqual({ - body: { - _source: false, - fields: ['*'], - query: { - function_score: { - query: { - bool: { - filter: [ - { - bool: { - filter: [], - minimum_should_match: 1, - must_not: [], - should: [{ term: { 'the-term': { value: 'the-value' } } }], - }, - }, - { - range: { - 'the-time-field-name': { - format: 'epoch_millis', - gte: 0, - lte: 50, - }, - }, - }, - ], - }, - }, - random_score: {}, - }, - }, - size: 1000, - track_total_hits: true, - }, - index: paramsSearchQueryMock.index, - ignore_throttled: undefined, - ignore_unavailable: true, - }); - }); -}); diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.ts deleted file mode 100644 index 7c1abdfd52667..0000000000000 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.ts +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; - -import type { AiopsLogRateAnalysisSchema } from '../../../../common/api/log_rate_analysis/schema'; - -import { getQueryWithParams } from './get_query_with_params'; -import { getRequestBase } from './get_request_base'; - -const POPULATED_DOC_COUNT_SAMPLE_SIZE = 1000; - -export const getRandomDocsRequest = ( - params: AiopsLogRateAnalysisSchema -): estypes.SearchRequest => ({ - ...getRequestBase(params), - body: { - fields: ['*'], - _source: false, - query: { - function_score: { - query: getQueryWithParams({ params }), - // @ts-ignore - random_score: {}, - }, - }, - size: POPULATED_DOC_COUNT_SAMPLE_SIZE, - // Used to determine sample probability for follow up queries - track_total_hits: true, - }, -}); From 8c8c87b281565c98ceeabd650fbcd74b76fc6cf9 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 14 Mar 2024 11:15:41 +0100 Subject: [PATCH 2/3] fix comment --- .../server/routes/log_rate_analysis/queries/fetch_index_info.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts index 9311af1b91953..8ecea26d7fb33 100644 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts +++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts @@ -85,7 +85,7 @@ export const fetchIndexInfo = async ( } ); - // Get the total doc count for the baseline time range + // Get the total doc count for the deviation time range const respDeviationTotalDocCount = await esClient.search( getTotalDocCountRequest({ ...params, start: params.deviationMin, end: params.deviationMax }), { From a23f47eadd35be9f29eaef6b80d11f7805d4557f Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 14 Mar 2024 14:29:51 +0100 Subject: [PATCH 3/3] fix jest test --- .../routes/log_rate_analysis/queries/fetch_index_info.test.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.test.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.test.ts index 38377a9e0d32c..9cda77afd1c42 100644 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.test.ts +++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.test.ts @@ -24,9 +24,6 @@ describe('fetch_index_info', () => { myKeywordFieldName: { keyword: { aggregatable: true } }, // Should not end up as a field candidate, it's a keyword but non-aggregatable myKeywordFieldNameToBeIgnored: { keyword: { aggregatable: false } }, - // Should not end up as a field candidate, based on this field caps result it would be - // but it will not be part of the mocked search result so will count as unpopulated. - myUnpopulatedKeywordFieldName: { keyword: { aggregatable: true } }, // Should not end up as a field candidate since fields of type number will not be considered myNumericFieldName: { number: {} }, },