From a88e7d2dc84da05c2c9ab8b4a3a81e3d537bfe42 Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Tue, 10 Oct 2023 14:41:13 -0400 Subject: [PATCH] [8.11] [ML] AIOps: Functional/API integration tests for text field support for log rate analysis (#168177) (#168516) # Backport This will backport the following commits from `main` to `8.11`: - [[ML] AIOps: Functional/API integration tests for text field support for log rate analysis (#168177)](https://github.com/elastic/kibana/pull/168177) ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) Co-authored-by: Walter Rafelsberger --- .../filtered_frequent_item_sets.ts | 4 +- ...final_significant_term_groups_textfield.ts | 129 ++++++++++++++++++ .../artificial_logs/frequent_item_sets.ts | 4 +- .../significant_log_patterns.ts | 24 ++++ x-pack/plugins/aiops/common/constants.ts | 14 +- x-pack/plugins/aiops/common/types.ts | 8 +- .../aiops/server/routes/log_rate_analysis.ts | 45 +++--- .../server/routes/queries/fetch_categories.ts | 6 +- .../queries/fetch_frequent_item_sets.ts | 20 +-- .../queries/fetch_significant_categories.ts | 39 ++++-- .../fetch_significant_term_p_values.ts | 7 +- .../fetch_terms_2_categories_counts.ts | 72 ++++++++-- .../routes/queries/get_query_with_params.ts | 3 + .../queries/get_significant_term_groups.ts | 4 +- .../queries/get_simple_hierarchical_tree.ts | 12 +- .../server/routes/queries/get_value_counts.ts | 4 +- .../routes/queries/get_values_descending.ts | 4 +- .../aiops/log_rate_analysis_groups_only.ts | 25 +++- .../api_integration/apis/aiops/test_data.ts | 104 +++++++++++++- .../test/api_integration/apis/aiops/types.ts | 4 +- .../apps/aiops/log_rate_analysis.ts | 2 +- ...data.ts => log_rate_analysis_test_data.ts} | 104 ++++++++++---- x-pack/test/functional/apps/aiops/types.ts | 4 +- .../apps/ml/data_visualizer/data_drift.ts | 2 +- .../aiops/log_rate_analysis_data_generator.ts | 101 ++++++++++---- 25 files changed, 603 insertions(+), 142 deletions(-) create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_significant_term_groups_textfield.ts create mode 100644 x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_log_patterns.ts rename x-pack/test/functional/apps/aiops/{test_data.ts => log_rate_analysis_test_data.ts} (70%) diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_item_sets.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_item_sets.ts index 5f3d8ce759e19..89e9c1fb141ab 100644 --- a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_item_sets.ts +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_item_sets.ts @@ -5,9 +5,9 @@ * 2.0. */ -import type { ItemsetResult } from '../../types'; +import type { ItemSet } from '../../types'; -export const filteredFrequentItemSets: ItemsetResult[] = [ +export const filteredFrequentItemSets: ItemSet[] = [ { set: { response_code: '500', url: 'home.php' }, size: 2, diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_significant_term_groups_textfield.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_significant_term_groups_textfield.ts new file mode 100644 index 0000000000000..f959d9408c418 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_significant_term_groups_textfield.ts @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { SignificantTermGroup } from '@kbn/ml-agg-utils'; + +export const finalSignificantTermGroupsTextfield: SignificantTermGroup[] = [ + { + docCount: 636, + group: [ + { + docCount: 792, + duplicate: 2, + fieldName: 'url', + fieldValue: 'home.php', + key: 'url:home.php', + pValue: 0.00974308761016614, + type: 'keyword', + }, + { + docCount: 636, + duplicate: 2, + fieldName: 'user', + fieldValue: 'Peter', + key: 'user:Peter', + pValue: 0.00974308761016614, + type: 'keyword', + }, + ], + id: '2091742187', + pValue: 0.00974308761016614, + }, + { + docCount: 634, + group: [ + { + docCount: 1266, + duplicate: 2, + fieldName: 'response_code', + fieldValue: '500', + key: 'response_code:500', + pValue: 0.012783309213417932, + type: 'keyword', + }, + { + docCount: 792, + duplicate: 2, + fieldName: 'url', + fieldValue: 'home.php', + key: 'url:home.php', + pValue: 0.00974308761016614, + type: 'keyword', + }, + { + docCount: 634, + duplicate: 2, + fieldName: 'message', + fieldValue: 'an unexpected error occured', + key: 'an unexpected error occured', + pValue: 0.00974308761016614, + type: 'log_pattern', + }, + ], + id: '1528268618', + pValue: 0.00974308761016614, + }, + { + docCount: 632, + group: [ + { + docCount: 1266, + duplicate: 2, + fieldName: 'response_code', + fieldValue: '500', + key: 'response_code:500', + pValue: 0.012783309213417932, + type: 'keyword', + }, + { + docCount: 790, + duplicate: 2, + fieldName: 'url', + fieldValue: 'login.php', + key: 'url:login.php', + pValue: 0.012783309213417932, + type: 'keyword', + }, + { + docCount: 632, + duplicate: 2, + fieldName: 'message', + fieldValue: 'an unexpected error occured', + key: 'an unexpected error occured', + pValue: 0.012783309213417932, + type: 'log_pattern', + }, + ], + id: '2619569380', + pValue: 0.012783309213417932, + }, + { + docCount: 632, + group: [ + { + docCount: 790, + duplicate: 2, + fieldName: 'url', + fieldValue: 'login.php', + key: 'url:login.php', + pValue: 0.012783309213417932, + type: 'keyword', + }, + { + docCount: 632, + duplicate: 2, + fieldName: 'user', + fieldValue: 'Peter', + key: 'user:Peter', + pValue: 0.012783309213417932, + type: 'keyword', + }, + ], + id: '1937394803', + pValue: 0.012783309213417932, + }, +]; diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_item_sets.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_item_sets.ts index 3a744a0a3a578..b354bb00f7b2c 100644 --- a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_item_sets.ts +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_item_sets.ts @@ -5,9 +5,9 @@ * 2.0. */ -import type { ItemsetResult } from '../../types'; +import type { ItemSet } from '../../types'; -export const frequentItemSets: ItemsetResult[] = [ +export const frequentItemSets: ItemSet[] = [ { set: { response_code: '500', url: 'home.php' }, size: 2, diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_log_patterns.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_log_patterns.ts new file mode 100644 index 0000000000000..ab3ebe02dc536 --- /dev/null +++ b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_log_patterns.ts @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { SignificantTerm } from '@kbn/ml-agg-utils'; + +export const significantLogPatterns: SignificantTerm[] = [ + { + bg_count: 0, + doc_count: 1266, + fieldName: 'message', + fieldValue: 'an unexpected error occured', + key: 'an unexpected error occured', + normalizedScore: 0, + pValue: 0.000001, + score: -13.815510557964274, + total_bg_count: 1975, + total_doc_count: 4669, + type: 'log_pattern', + }, +]; diff --git a/x-pack/plugins/aiops/common/constants.ts b/x-pack/plugins/aiops/common/constants.ts index 226082e6041b9..47bdee0d5e6c6 100644 --- a/x-pack/plugins/aiops/common/constants.ts +++ b/x-pack/plugins/aiops/common/constants.ts @@ -5,10 +5,16 @@ * 2.0. */ -/** - * The p-value threshold to be used for statistically significant items. - */ -export const LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD = 0.02; +export const LOG_RATE_ANALYSIS_SETTINGS = { + /** + * The p-value threshold to be used for statistically significant items. + */ + P_VALUE_THRESHOLD: 0.02, + /** + * The minimum support value to be used for the frequent item sets aggration. + */ + FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT: 0.001, +} as const; /** * For the technical preview of Log Rate Analysis we use a hard coded seed. diff --git a/x-pack/plugins/aiops/common/types.ts b/x-pack/plugins/aiops/common/types.ts index b46dd587838b4..4b26e30c76a72 100644 --- a/x-pack/plugins/aiops/common/types.ts +++ b/x-pack/plugins/aiops/common/types.ts @@ -14,7 +14,7 @@ export interface SignificantTermDuplicateGroup { export type FieldValuePairCounts = Record>; -export interface ItemsetResult { +export interface ItemSet { set: Record; size: number; maxPValue: number; @@ -23,6 +23,12 @@ export interface ItemsetResult { total_doc_count: number; } +export interface FetchFrequentItemSetsResponse { + fields: string[]; + itemSets: ItemSet[]; + totalDocCount: number; +} + interface SimpleHierarchicalTreeNodeSet extends FieldValuePair { key: string; type: SignificantTermType; diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts index 7a0d1be0d7585..7576faa22ec27 100644 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts +++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts @@ -506,7 +506,7 @@ export const defineLogRateAnalysisRoute = ( ); try { - const { fields, df } = await fetchFrequentItemSets( + const { fields, itemSets } = await fetchFrequentItemSets( client, request.body.index, JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer, @@ -520,23 +520,26 @@ export const defineLogRateAnalysisRoute = ( abortSignal ); - if (significantCategories.length > 0) { - const { fields: significantCategoriesFields, df: significantCategoriesDf } = - await fetchTerms2CategoriesCounts( - client, - request.body, - JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer, - significantTerms, - significantCategories, - request.body.deviationMin, - request.body.deviationMax, - logger, - pushError, - abortSignal - ); + if (significantCategories.length > 0 && significantTerms.length > 0) { + const { + fields: significantCategoriesFields, + itemSets: significantCategoriesItemSets, + } = await fetchTerms2CategoriesCounts( + client, + request.body, + JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer, + significantTerms, + itemSets, + significantCategories, + request.body.deviationMin, + request.body.deviationMax, + logger, + pushError, + abortSignal + ); fields.push(...significantCategoriesFields); - df.push(...significantCategoriesDf); + itemSets.push(...significantCategoriesItemSets); } if (shouldStop) { @@ -545,9 +548,9 @@ export const defineLogRateAnalysisRoute = ( return; } - if (fields.length > 0 && df.length > 0) { + if (fields.length > 0 && itemSets.length > 0) { const significantTermGroups = getSignificantTermGroups( - df, + itemSets, [...significantTerms, ...significantCategories], fields ); @@ -757,7 +760,11 @@ export const defineLogRateAnalysisRoute = ( } // histograms for text field patterns - if (overallTimeSeries !== undefined && significantCategories.length > 0) { + if ( + overallTimeSeries !== undefined && + significantCategories.length > 0 && + !request.body.overrides?.regroupOnly + ) { const significantCategoriesHistogramQueries = significantCategories.map((d) => { const histogramQuery = getHistogramQuery(request.body); const categoryQuery = getCategoryQuery(d.fieldName, [ diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_categories.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_categories.ts index dd72e21990150..b58e438e3882a 100644 --- a/x-pack/plugins/aiops/server/routes/queries/fetch_categories.ts +++ b/x-pack/plugins/aiops/server/routes/queries/fetch_categories.ts @@ -33,11 +33,14 @@ export const getCategoryRequest = ( fieldName: string, from: number | undefined, to: number | undefined, + filter: estypes.QueryDslQueryContainer, { wrap }: RandomSamplerWrapper ): estypes.SearchRequest => { const { index, timeFieldName } = params; const query = getQueryWithParams({ params, + termFilters: undefined, + filter, }); const { params: request } = createCategoryRequest( index, @@ -63,6 +66,7 @@ export const fetchCategories = async ( fieldNames: string[], from: number | undefined, to: number | undefined, + filter: estypes.QueryDslQueryContainer, logger: Logger, // The default value of 1 means no sampling will be used sampleProbability: number = 1, @@ -78,7 +82,7 @@ export const fetchCategories = async ( const settledPromises = await Promise.allSettled( fieldNames.map((fieldName) => { - const request = getCategoryRequest(params, fieldName, from, to, randomSamplerWrapper); + const request = getCategoryRequest(params, fieldName, from, to, filter, randomSamplerWrapper); return esClient.search(request, { signal: abortSignal, maxRetries: 0, diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_item_sets.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_item_sets.ts index ccb237314c125..d73c3742e8e66 100644 --- a/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_item_sets.ts +++ b/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_item_sets.ts @@ -15,8 +15,12 @@ import type { Logger } from '@kbn/logging'; import { type SignificantTerm } from '@kbn/ml-agg-utils'; import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils'; -import { RANDOM_SAMPLER_SEED } from '../../../common/constants'; -import type { SignificantTermDuplicateGroup, ItemsetResult } from '../../../common/types'; +import { RANDOM_SAMPLER_SEED, LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants'; +import type { + SignificantTermDuplicateGroup, + ItemSet, + FetchFrequentItemSetsResponse, +} from '../../../common/types'; interface FrequentItemSetsAggregation extends estypes.AggregationsSamplerAggregation { fi: { @@ -74,7 +78,7 @@ export async function fetchFrequentItemSets( sampleProbability: number = 1, emitError: (m: string) => void, abortSignal?: AbortSignal -) { +): Promise { // Sort significant terms by ascending p-value, necessary to apply the field limit correctly. const sortedSignificantTerms = significantTerms.slice().sort((a, b) => { return (a.pValue ?? 0) - (b.pValue ?? 0); @@ -103,7 +107,7 @@ export async function fetchFrequentItemSets( frequent_item_sets: { minimum_set_size: 2, size: 200, - minimum_support: 0.001, + minimum_support: LOG_RATE_ANALYSIS_SETTINGS.FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT, fields: getFrequentItemSetsAggFields(sortedSignificantTerms), }, }, @@ -138,7 +142,7 @@ export async function fetchFrequentItemSets( emitError(`Failed to fetch frequent_item_sets.`); return { fields: [], - df: [], + itemSets: [], totalDocCount: 0, }; } @@ -158,10 +162,10 @@ export async function fetchFrequentItemSets( const fiss = frequentItemSets.fi.buckets; fiss.length = maximum; - const results: ItemsetResult[] = []; + const results: ItemSet[] = []; fiss.forEach((fis) => { - const result: ItemsetResult = { + const result: ItemSet = { set: {}, size: 0, maxPValue: 0, @@ -203,7 +207,7 @@ export async function fetchFrequentItemSets( return { fields: uniqueFields, - df: results, + itemSets: results, totalDocCount: totalDocCountFi, }; } diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts index 84e99f820bfb4..d8bd92f04e6a6 100644 --- a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts +++ b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts @@ -14,7 +14,7 @@ import { type SignificantTerm, SIGNIFICANT_TERM_TYPE } from '@kbn/ml-agg-utils'; import type { Category } from '../../../common/api/log_categorization/types'; import type { AiopsLogRateAnalysisSchema } from '../../../common/api/log_rate_analysis'; -import { LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD } from '../../../common/constants'; +import { LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants'; import { fetchCategories } from './fetch_categories'; import { fetchCategoryCounts } from './fetch_category_counts'; @@ -42,16 +42,39 @@ export const fetchSignificantCategories = async ( emitError: (m: string) => void, abortSignal?: AbortSignal ) => { - // To make sure we have the same categories for both baseline and deviation, - // we do an initial query that spans across baseline start and deviation end. - // We could update this to query the exact baseline AND deviation range, but - // wanted to avoid the refactor here and it should be good enough for a start. + // Filter that includes docs from both the baseline and deviation time range. + const baselineOrDeviationFilter = { + bool: { + should: [ + { + range: { + [params.timeFieldName]: { + gte: params.baselineMin, + lte: params.baselineMax, + format: 'epoch_millis', + }, + }, + }, + { + range: { + [params.timeFieldName]: { + gte: params.deviationMin, + lte: params.deviationMax, + format: 'epoch_millis', + }, + }, + }, + ], + }, + }; + const categoriesOverall = await fetchCategories( esClient, params, fieldNames, - params.baselineMin, - params.deviationMax, + undefined, + undefined, + baselineOrDeviationFilter, logger, sampleProbability, emitError, @@ -117,7 +140,7 @@ export const fetchSignificantCategories = async ( const pValue = criticalTableLookup(chiSquared, 1); const score = Math.log(pValue); - if (pValue <= LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD && observed > expected) { + if (pValue <= LOG_RATE_ANALYSIS_SETTINGS.P_VALUE_THRESHOLD && observed > expected) { significantCategories.push({ key, fieldName, diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts index 85a21e6870a03..ec1500092168f 100644 --- a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts +++ b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts @@ -15,10 +15,7 @@ import { type RandomSamplerWrapper, } from '@kbn/ml-random-sampler-utils'; -import { - LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD, - RANDOM_SAMPLER_SEED, -} from '../../../common/constants'; +import { LOG_RATE_ANALYSIS_SETTINGS, RANDOM_SAMPLER_SEED } from '../../../common/constants'; import type { AiopsLogRateAnalysisSchema } from '../../../common/api/log_rate_analysis'; import { isRequestAbortedError } from '../../lib/is_request_aborted_error'; @@ -168,7 +165,7 @@ export const fetchSignificantTermPValues = async ( for (const bucket of overallResult.buckets) { const pValue = Math.exp(-bucket.score); - if (typeof pValue === 'number' && pValue < LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD) { + if (typeof pValue === 'number' && pValue < LOG_RATE_ANALYSIS_SETTINGS.P_VALUE_THRESHOLD) { result.push({ key: `${fieldName}:${String(bucket.key)}`, type: SIGNIFICANT_TERM_TYPE.KEYWORD, diff --git a/x-pack/plugins/aiops/server/routes/queries/fetch_terms_2_categories_counts.ts b/x-pack/plugins/aiops/server/routes/queries/fetch_terms_2_categories_counts.ts index 1fdeaef5e18c3..a88090d3ab059 100644 --- a/x-pack/plugins/aiops/server/routes/queries/fetch_terms_2_categories_counts.ts +++ b/x-pack/plugins/aiops/server/routes/queries/fetch_terms_2_categories_counts.ts @@ -11,13 +11,14 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server'; import type { Logger } from '@kbn/logging'; -import { type SignificantTerm } from '@kbn/ml-agg-utils'; +import type { FieldValuePair, SignificantTerm } from '@kbn/ml-agg-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; import type { AiopsLogRateAnalysisSchema } from '../../../common/api/log_rate_analysis'; -import type { ItemsetResult } from '../../../common/types'; +import type { FetchFrequentItemSetsResponse, ItemSet } from '../../../common/types'; import { getCategoryQuery } from '../../../common/api/log_categorization/get_category_query'; import type { Category } from '../../../common/api/log_categorization/types'; +import { LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants'; import { isRequestAbortedError } from '../../lib/is_request_aborted_error'; @@ -26,9 +27,9 @@ import { getQueryWithParams } from './get_query_with_params'; const isMsearchResponseItem = (arg: unknown): arg is estypes.MsearchMultiSearchItem => isPopulatedObject(arg, ['hits']); -export const getTerm2CategoryCountRequest = ( +const getTerm2CategoryCountRequest = ( params: AiopsLogRateAnalysisSchema, - significantTerm: SignificantTerm, + fieldValuePairs: FieldValuePair[], categoryFieldName: string, category: Category, from: number | undefined, @@ -41,7 +42,9 @@ export const getTerm2CategoryCountRequest = ( const categoryQuery = getCategoryQuery(categoryFieldName, [category]); if (Array.isArray(query.bool?.filter)) { - query.bool?.filter?.push({ term: { [significantTerm.fieldName]: significantTerm.fieldValue } }); + for (const { fieldName, fieldValue } of fieldValuePairs) { + query.bool?.filter?.push({ term: { [fieldName]: fieldValue } }); + } query.bool?.filter?.push(categoryQuery); query.bool?.filter?.push({ range: { @@ -66,28 +69,29 @@ export async function fetchTerms2CategoriesCounts( params: AiopsLogRateAnalysisSchema, searchQuery: estypes.QueryDslQueryContainer, significantTerms: SignificantTerm[], + itemSets: ItemSet[], significantCategories: SignificantTerm[], from: number, to: number, logger: Logger, emitError: (m: string) => void, abortSignal?: AbortSignal -) { +): Promise { const searches: Array< | estypes.MsearchMultisearchBody | { index: string; } > = []; - const results: ItemsetResult[] = []; + const results: ItemSet[] = []; - significantTerms.forEach((term) => { - significantCategories.forEach((category) => { + significantCategories.forEach((category) => { + significantTerms.forEach((term) => { searches.push({ index: params.index }); searches.push( getTerm2CategoryCountRequest( params, - term, + [{ fieldName: term.fieldName, fieldValue: term.fieldValue }], category.fieldName, { key: `${category.key}`, count: category.doc_count, examples: [] }, from, @@ -102,8 +106,36 @@ export async function fetchTerms2CategoriesCounts( size: 2, maxPValue: Math.max(term.pValue ?? 1, category.pValue ?? 1), doc_count: 0, - support: 1, - total_doc_count: 0, + support: 0, + total_doc_count: Math.max(term.total_doc_count, category.total_doc_count), + }); + }); + + itemSets.forEach((itemSet) => { + searches.push({ index: params.index }); + searches.push( + getTerm2CategoryCountRequest( + params, + Object.entries(itemSet.set).map(([fieldName, fieldValue]) => ({ + fieldName, + fieldValue, + })), + category.fieldName, + { key: `${category.key}`, count: category.doc_count, examples: [] }, + from, + to + ) as estypes.MsearchMultisearchBody + ); + results.push({ + set: { + ...itemSet.set, + [category.fieldName]: category.fieldValue, + }, + size: Object.keys(itemSet.set).length + 1, + maxPValue: Math.max(itemSet.maxPValue ?? 1, category.pValue ?? 1), + doc_count: 0, + support: 0, + total_doc_count: Math.max(itemSet.total_doc_count, category.total_doc_count), }); }); }); @@ -127,7 +159,7 @@ export async function fetchTerms2CategoriesCounts( } return { fields: [], - df: [], + itemSets: [], totalDocCount: 0, }; } @@ -136,15 +168,25 @@ export async function fetchTerms2CategoriesCounts( return { fields: uniq(significantCategories.map((c) => c.fieldName)), - df: results + itemSets: results .map((result, i) => { const resp = mSearchResponses[i]; if (isMsearchResponseItem(resp)) { result.doc_count = (resp.hits.total as estypes.SearchTotalHits).value ?? 0; + if (result.total_doc_count > 0) { + // Replicates how the `frequent_item_sets` aggregation calculates + // the support value by dividing the number of documents containing + // the item set by the total number of documents. + result.support = result.doc_count / result.total_doc_count; + } } return result; }) - .filter((d) => d.doc_count > 0), + .filter( + (d) => + d.doc_count > 0 && + d.support > LOG_RATE_ANALYSIS_SETTINGS.FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT + ), totalDocCount: 0, }; } diff --git a/x-pack/plugins/aiops/server/routes/queries/get_query_with_params.ts b/x-pack/plugins/aiops/server/routes/queries/get_query_with_params.ts index 6c95085b379be..c9d15d6b89232 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_query_with_params.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_query_with_params.ts @@ -20,10 +20,12 @@ export const getTermsQuery = ({ fieldName, fieldValue }: FieldValuePair) => { interface QueryParams { params: AiopsLogRateAnalysisSchema; termFilters?: FieldValuePair[]; + filter?: estypes.QueryDslQueryContainer; } export const getQueryWithParams = ({ params, termFilters, + filter, }: QueryParams): estypes.QueryDslQueryContainer => { const searchQuery = JSON.parse(params.searchQuery) as estypes.QueryDslQueryContainer; return { @@ -32,6 +34,7 @@ export const getQueryWithParams = ({ searchQuery, ...getFilters(params), ...(Array.isArray(termFilters) ? termFilters.map(getTermsQuery) : []), + ...(filter ? [filter] : []), ] as estypes.QueryDslQueryContainer[], }, }; diff --git a/x-pack/plugins/aiops/server/routes/queries/get_significant_term_groups.ts b/x-pack/plugins/aiops/server/routes/queries/get_significant_term_groups.ts index 74951bf7aa1d9..33337603bd04e 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_significant_term_groups.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_significant_term_groups.ts @@ -15,10 +15,10 @@ import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree'; import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves'; import { getMissingSignificantTerms } from './get_missing_significant_terms'; import { transformSignificantTermToGroup } from './transform_significant_term_to_group'; -import type { ItemsetResult } from '../../../common/types'; +import type { ItemSet } from '../../../common/types'; export function getSignificantTermGroups( - itemsets: ItemsetResult[], + itemsets: ItemSet[], significantTerms: SignificantTerm[], fields: string[] ): SignificantTermGroup[] { diff --git a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts index 2462878798322..fc445fd88f1a6 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts @@ -7,7 +7,7 @@ import type { SignificantTerm } from '@kbn/ml-agg-utils'; -import type { ItemsetResult, SimpleHierarchicalTreeNode } from '../../../common/types'; +import type { ItemSet, SimpleHierarchicalTreeNode } from '../../../common/types'; import { getValueCounts } from './get_value_counts'; import { getValuesDescending } from './get_values_descending'; @@ -54,7 +54,7 @@ function dfDepthFirstSearch( parentLabel: string, field: string, value: string, - iss: ItemsetResult[], + iss: ItemSet[], collapseRedundant: boolean, displayOther: boolean ) { @@ -178,18 +178,18 @@ function dfDepthFirstSearch( * By default (fields==None), the field search order is dependent on the highest count itemsets. */ export function getSimpleHierarchicalTree( - df: ItemsetResult[], + itemSets: ItemSet[], collapseRedundant: boolean, displayOther: boolean, significantTerms: SignificantTerm[], fields: string[] = [] ) { - const totalDocCount = Math.max(...df.map((d) => d.total_doc_count)); + const totalDocCount = Math.max(...itemSets.map((d) => d.total_doc_count)); const newRoot = NewNodeFactory(''); for (const field of fields) { - for (const value of getValuesDescending(df, field)) { + for (const value of getValuesDescending(itemSets, field)) { dfDepthFirstSearch( significantTerms, fields, @@ -198,7 +198,7 @@ export function getSimpleHierarchicalTree( '', field, value, - df, + itemSets, collapseRedundant, displayOther ); diff --git a/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts index b287d49494d78..42f022db5dccf 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts @@ -5,9 +5,9 @@ * 2.0. */ -import type { ItemsetResult } from '../../../common/types'; +import type { ItemSet } from '../../../common/types'; -export function getValueCounts(df: ItemsetResult[], field: string) { +export function getValueCounts(df: ItemSet[], field: string) { return df.reduce>((p, c) => { if (c.set[field] === undefined) { return p; diff --git a/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts index 8429ca4fcae75..bad62b3056ace 100644 --- a/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts +++ b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts @@ -5,11 +5,11 @@ * 2.0. */ -import type { ItemsetResult } from '../../../common/types'; +import type { ItemSet } from '../../../common/types'; import { getValueCounts } from './get_value_counts'; -export function getValuesDescending(df: ItemsetResult[], field: string): string[] { +export function getValuesDescending(df: ItemSet[], field: string): string[] { const valueCounts = getValueCounts(df, field); const keys = Object.keys(valueCounts); diff --git a/x-pack/test/api_integration/apis/aiops/log_rate_analysis_groups_only.ts b/x-pack/test/api_integration/apis/aiops/log_rate_analysis_groups_only.ts index 4bdd0dc0f8a94..e504f2250986f 100644 --- a/x-pack/test/api_integration/apis/aiops/log_rate_analysis_groups_only.ts +++ b/x-pack/test/api_integration/apis/aiops/log_rate_analysis_groups_only.ts @@ -63,11 +63,17 @@ export default ({ getService }: FtrProviderContext) => { const addSignificantTermsActions = data.filter( (d) => d.type === testData.expected.significantTermFilter ); - expect(addSignificantTermsActions.length).to.be(0); + expect(addSignificantTermsActions.length).to.eql( + 0, + `Expected significant terms actions to be 0, got ${addSignificantTermsActions.length}` + ); const histogramActions = data.filter((d) => d.type === testData.expected.histogramFilter); // for each significant term we should get a histogram - expect(histogramActions.length).to.be(0); + expect(histogramActions.length).to.eql( + 0, + `Expected histogram actions to be 0, got ${histogramActions.length}` + ); const groupActions = data.filter((d) => d.type === testData.expected.groupFilter); const groups = groupActions.flatMap((d) => d.payload); @@ -188,21 +194,26 @@ export default ({ getService }: FtrProviderContext) => { } // If streaming works correctly we should receive more than one chunk. - expect(chunkCounter).to.be.greaterThan(1); + expect(chunkCounter).to.be.greaterThan( + 1, + `Expected 'chunkCounter' to be greater than 1, got ${chunkCounter} with the following data: ${JSON.stringify( + data + )}.` + ); await assertAnalysisResult(data); } } - it('should return group only in chunks with streaming with compression with flushFix', async () => { + it('should return group only in chunks with streaming with compression with flushFix', async () => { await requestWithStreaming({ ...testData.requestBody, overrides }); }); - it('should return group only in chunks with streaming with compression without flushFix', async () => { + it('should return group only in chunks with streaming with compression without flushFix', async () => { await requestWithStreaming({ ...testData.requestBody, overrides, flushFix: false }); }); - it('should return group only in chunks with streaming without compression with flushFix', async () => { + it('should return group only in chunks with streaming without compression with flushFix', async () => { await requestWithStreaming({ ...testData.requestBody, overrides, @@ -210,7 +221,7 @@ export default ({ getService }: FtrProviderContext) => { }); }); - it('should return group only in chunks with streaming without compression without flushFix', async () => { + it('should return group only in chunks with streaming without compression without flushFix', async () => { await requestWithStreaming({ ...testData.requestBody, overrides, diff --git a/x-pack/test/api_integration/apis/aiops/test_data.ts b/x-pack/test/api_integration/apis/aiops/test_data.ts index 9ec8b69a3ca5d..184925310940e 100644 --- a/x-pack/test/api_integration/apis/aiops/test_data.ts +++ b/x-pack/test/api_integration/apis/aiops/test_data.ts @@ -9,7 +9,9 @@ // This makes sure should the assertions for the integration tests need to be updated, // that also the jest unit tests use mocks that are not outdated. import { significantTerms as artificialLogSignificantTerms } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/significant_terms'; +import { significantLogPatterns as artificialLogSignificantLogPatterns } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/significant_log_patterns'; import { finalSignificantTermGroups as artificialLogsSignificantTermGroups } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/final_significant_term_groups'; +import { finalSignificantTermGroupsTextfield as artificialLogsSignificantTermGroupsTextfield } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/final_significant_term_groups_textfield'; import type { TestData } from './types'; @@ -74,14 +76,14 @@ export const logRateAnalysisTestData: TestData[] = [ }, }, { - testName: 'artificial_logs_with_spike', - dataGenerator: 'artificial_logs_with_spike', + testName: 'artificial_logs_with_spike_notextfield', + dataGenerator: 'artificial_logs_with_spike_notextfield', requestBody: { start: 1668760018793, end: 1668931954793, searchQuery: '{"match_all":{}}', timeFieldName: '@timestamp', - index: 'artificial_logs_with_spike', + index: 'artificial_logs_with_spike_notextfield', baselineMin: 1668769200000, baselineMax: 1668837600000, deviationMin: 1668855600000, @@ -105,4 +107,100 @@ export const logRateAnalysisTestData: TestData[] = [ histogramLength: 20, }, }, + { + testName: 'artificial_logs_with_spike_textfield', + dataGenerator: 'artificial_logs_with_spike_textfield', + requestBody: { + start: 1668760018793, + end: 1668931954793, + searchQuery: '{"match_all":{}}', + timeFieldName: '@timestamp', + index: 'artificial_logs_with_spike_textfield', + baselineMin: 1668769200000, + baselineMax: 1668837600000, + deviationMin: 1668855600000, + deviationMax: 1668924000000, + grouping: true, + }, + expected: { + chunksLength: 30, + chunksLengthGroupOnly: 11, + actionsLength: 29, + actionsLengthGroupOnly: 10, + noIndexChunksLength: 4, + noIndexActionsLength: 3, + significantTermFilter: 'add_significant_terms', + groupFilter: 'add_significant_terms_group', + groupHistogramFilter: 'add_significant_terms_group_histogram', + histogramFilter: 'add_significant_terms_histogram', + errorFilter: 'add_error', + significantTerms: [...artificialLogSignificantTerms, ...artificialLogSignificantLogPatterns], + groups: artificialLogsSignificantTermGroupsTextfield, + histogramLength: 20, + }, + }, + { + testName: 'artificial_logs_with_dip_notextfield', + dataGenerator: 'artificial_logs_with_dip_notextfield', + requestBody: { + start: 1668760018793, + end: 1668931954793, + searchQuery: '{"match_all":{}}', + timeFieldName: '@timestamp', + index: 'artificial_logs_with_dip_notextfield', + baselineMin: 1668855600000, + baselineMax: 1668924000000, + deviationMin: 1668769200000, + deviationMax: 1668837600000, + grouping: true, + }, + expected: { + chunksLength: 27, + chunksLengthGroupOnly: 11, + actionsLength: 26, + actionsLengthGroupOnly: 10, + noIndexChunksLength: 4, + noIndexActionsLength: 3, + significantTermFilter: 'add_significant_terms', + groupFilter: 'add_significant_terms_group', + groupHistogramFilter: 'add_significant_terms_group_histogram', + histogramFilter: 'add_significant_terms_histogram', + errorFilter: 'add_error', + significantTerms: artificialLogSignificantTerms, + groups: artificialLogsSignificantTermGroups, + histogramLength: 20, + }, + }, + { + testName: 'artificial_logs_with_dip_textfield', + dataGenerator: 'artificial_logs_with_dip_textfield', + requestBody: { + start: 1668760018793, + end: 1668931954793, + searchQuery: '{"match_all":{}}', + timeFieldName: '@timestamp', + index: 'artificial_logs_with_dip_textfield', + baselineMin: 1668855600000, + baselineMax: 1668924000000, + deviationMin: 1668769200000, + deviationMax: 1668837600000, + grouping: true, + }, + expected: { + chunksLength: 30, + chunksLengthGroupOnly: 11, + actionsLength: 29, + actionsLengthGroupOnly: 10, + noIndexChunksLength: 4, + noIndexActionsLength: 3, + significantTermFilter: 'add_significant_terms', + groupFilter: 'add_significant_terms_group', + groupHistogramFilter: 'add_significant_terms_group_histogram', + histogramFilter: 'add_significant_terms_histogram', + errorFilter: 'add_error', + significantTerms: [...artificialLogSignificantTerms, ...artificialLogSignificantLogPatterns], + groups: artificialLogsSignificantTermGroupsTextfield, + histogramLength: 20, + }, + }, ]; diff --git a/x-pack/test/api_integration/apis/aiops/types.ts b/x-pack/test/api_integration/apis/aiops/types.ts index 67ef9ea19a9da..c4e9eb8191108 100644 --- a/x-pack/test/api_integration/apis/aiops/types.ts +++ b/x-pack/test/api_integration/apis/aiops/types.ts @@ -8,10 +8,12 @@ import type { AiopsApiLogRateAnalysis } from '@kbn/aiops-plugin/common/api'; import type { SignificantTerm, SignificantTermGroup } from '@kbn/ml-agg-utils'; +import type { LogRateAnalysisDataGenerator } from '../../../functional/services/aiops/log_rate_analysis_data_generator'; + export interface TestData { testName: string; esArchive?: string; - dataGenerator?: string; + dataGenerator?: LogRateAnalysisDataGenerator; requestBody: AiopsApiLogRateAnalysis['body']; expected: { chunksLength: number; diff --git a/x-pack/test/functional/apps/aiops/log_rate_analysis.ts b/x-pack/test/functional/apps/aiops/log_rate_analysis.ts index 72323d4fef37c..8e33b4b1c8e4a 100644 --- a/x-pack/test/functional/apps/aiops/log_rate_analysis.ts +++ b/x-pack/test/functional/apps/aiops/log_rate_analysis.ts @@ -11,7 +11,7 @@ import expect from '@kbn/expect'; import type { FtrProviderContext } from '../../ftr_provider_context'; import { isTestDataExpectedWithSampleProbability, type TestData } from './types'; -import { logRateAnalysisTestData } from './test_data'; +import { logRateAnalysisTestData } from './log_rate_analysis_test_data'; export default function ({ getPageObjects, getService }: FtrProviderContext) { const PageObjects = getPageObjects(['common', 'console', 'header', 'home', 'security']); diff --git a/x-pack/test/functional/apps/aiops/test_data.ts b/x-pack/test/functional/apps/aiops/log_rate_analysis_test_data.ts similarity index 70% rename from x-pack/test/functional/apps/aiops/test_data.ts rename to x-pack/test/functional/apps/aiops/log_rate_analysis_test_data.ts index adf1447b1f346..3fb1e00b95201 100644 --- a/x-pack/test/functional/apps/aiops/test_data.ts +++ b/x-pack/test/functional/apps/aiops/log_rate_analysis_test_data.ts @@ -176,12 +176,19 @@ const DAY_MS = 86400000; const DEVIATION_TS = REFERENCE_TS - DAY_MS * 2; const BASELINE_TS = DEVIATION_TS - DAY_MS * 1; -const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): TestData => ({ - suiteTitle: `artificial logs with ${analysisType}`, +const getArtificialLogDataViewTestData = ( + analysisType: LogRateAnalysisType, + textField: boolean +): TestData => ({ + suiteTitle: `artificial logs with ${analysisType} and ${ + textField ? 'text field' : 'no text field' + }`, analysisType, - dataGenerator: `artificial_logs_with_${analysisType}`, + dataGenerator: `artificial_logs_with_${analysisType}_${textField ? 'textfield' : 'notextfield'}`, isSavedSearch: false, - sourceIndexOrSavedSearch: `artificial_logs_with_${analysisType}`, + sourceIndexOrSavedSearch: `artificial_logs_with_${analysisType}_${ + textField ? 'textfield' : 'notextfield' + }`, brushBaselineTargetTimestamp: BASELINE_TS + DAY_MS / 2, brushDeviationTargetTimestamp: DEVIATION_TS + DAY_MS / 2, brushIntervalFactor: 10, @@ -191,14 +198,24 @@ const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): Te expected: { totalDocCountFormatted: '8,400', analysisGroupsTable: [ - { - group: 'response_code: 500url: home.php', - docCount: '792', - }, - { - group: 'url: login.phpresponse_code: 500', - docCount: '790', - }, + textField + ? { + group: 'message: an unexpected error occuredurl: home.phpresponse_code: 500', + docCount: '634', + } + : { + group: 'response_code: 500url: home.php', + docCount: '792', + }, + textField + ? { + group: 'message: an unexpected error occuredurl: login.phpresponse_code: 500', + docCount: '632', + } + : { + group: 'url: login.phpresponse_code: 500', + docCount: '790', + }, { docCount: '636', group: 'user: Peterurl: home.php', @@ -208,11 +225,40 @@ const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): Te group: 'user: Peterurl: login.php', }, ], - filteredAnalysisGroupsTable: [ - { group: '* url: home.phpresponse_code: 500', docCount: '792' }, - { group: '* url: login.phpresponse_code: 500', docCount: '790' }, - ], + filteredAnalysisGroupsTable: textField + ? [ + { + group: '* url: home.phpmessage: an unexpected error occuredresponse_code: 500', + docCount: '634', + }, + { + group: '* url: login.phpmessage: an unexpected error occuredresponse_code: 500', + docCount: '632', + }, + ] + : [ + { group: '* url: home.phpresponse_code: 500', docCount: '792' }, + { group: '* url: login.phpresponse_code: 500', docCount: '790' }, + ], analysisTable: [ + ...(textField + ? [ + { + fieldName: 'message', + fieldValue: 'an unexpected error occured', + logRate: 'Chart type:bar chart', + pValue: '0.00000100', + impact: 'Medium', + }, + { + fieldName: 'response_code', + fieldValue: '500', + logRate: 'Chart type:bar chart', + pValue: '3.61e-12', + impact: 'High', + }, + ] + : []), { fieldName: 'url', fieldValue: 'home.php', @@ -220,15 +266,19 @@ const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): Te logRate: 'Chart type:bar chart', pValue: '0.00974', }, - { - fieldName: 'user', - fieldValue: 'Peter', - impact: 'High', - logRate: 'Chart type:bar chart', - pValue: '2.63e-21', - }, + ...(textField + ? [] + : [ + { + fieldName: 'user', + fieldValue: 'Peter', + impact: 'High', + logRate: 'Chart type:bar chart', + pValue: '2.63e-21', + }, + ]), ], - fieldSelectorPopover: ['response_code', 'url', 'user'], + fieldSelectorPopover: [...(textField ? ['message'] : []), 'response_code', 'url', 'user'], }, }); @@ -236,6 +286,8 @@ export const logRateAnalysisTestData: TestData[] = [ kibanaLogsDataViewTestData, farequoteDataViewTestData, farequoteDataViewTestDataWithQuery, - getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.SPIKE), - getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.DIP), + getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.SPIKE, false), + getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.SPIKE, true), + getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.DIP, false), + getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.DIP, true), ]; diff --git a/x-pack/test/functional/apps/aiops/types.ts b/x-pack/test/functional/apps/aiops/types.ts index 0832d9d921615..45f376cb670e1 100644 --- a/x-pack/test/functional/apps/aiops/types.ts +++ b/x-pack/test/functional/apps/aiops/types.ts @@ -8,6 +8,8 @@ import type { LogRateAnalysisType } from '@kbn/aiops-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; +import { LogRateAnalysisDataGenerator } from '../../services/aiops/log_rate_analysis_data_generator'; + interface TestDataTableActionLogPatternAnalysis { type: 'LogPatternAnalysis'; tableRowId: string; @@ -46,7 +48,7 @@ interface TestDataExpectedWithoutSampleProbability { export interface TestData { suiteTitle: string; analysisType: LogRateAnalysisType; - dataGenerator: string; + dataGenerator: LogRateAnalysisDataGenerator; isSavedSearch?: boolean; sourceIndexOrSavedSearch: string; rowsPerPage?: 10 | 25 | 50; diff --git a/x-pack/test/functional/apps/ml/data_visualizer/data_drift.ts b/x-pack/test/functional/apps/ml/data_visualizer/data_drift.ts index 2eb579c1720d8..a0cd70db3655a 100644 --- a/x-pack/test/functional/apps/ml/data_visualizer/data_drift.ts +++ b/x-pack/test/functional/apps/ml/data_visualizer/data_drift.ts @@ -6,7 +6,7 @@ */ import { FtrProviderContext } from '../../../ftr_provider_context'; -import { farequoteDataViewTestDataWithQuery } from '../../aiops/test_data'; +import { farequoteDataViewTestDataWithQuery } from '../../aiops/log_rate_analysis_test_data'; import { TestData } from '../../aiops/types'; export default function ({ getService, getPageObjects }: FtrProviderContext) { diff --git a/x-pack/test/functional/services/aiops/log_rate_analysis_data_generator.ts b/x-pack/test/functional/services/aiops/log_rate_analysis_data_generator.ts index a628c730fdf76..48028b2ddbd1a 100644 --- a/x-pack/test/functional/services/aiops/log_rate_analysis_data_generator.ts +++ b/x-pack/test/functional/services/aiops/log_rate_analysis_data_generator.ts @@ -11,6 +11,17 @@ import { LOG_RATE_ANALYSIS_TYPE } from '@kbn/aiops-utils'; import { FtrProviderContext } from '../../ftr_provider_context'; +const LOG_RATE_ANALYSYS_DATA_GENERATOR = { + KIBANA_SAMPLE_DATA_LOGS: 'kibana_sample_data_logs', + FAREQUOTE_WITH_SPIKE: 'farequote_with_spike', + ARTIFICIAL_LOGS_WITH_SPIKE_NOTEXTFIELD: 'artificial_logs_with_spike_notextfield', + ARTIFICIAL_LOGS_WITH_SPIKE_TEXTFIELD: 'artificial_logs_with_spike_textfield', + ARTIFICIAL_LOGS_WITH_DIP_NOTEXTFIELD: 'artificial_logs_with_dip_notextfield', + ARTIFICIAL_LOGS_WITH_DIP_TEXTFIELD: 'artificial_logs_with_dip_textfield', +} as const; +export type LogRateAnalysisDataGenerator = + typeof LOG_RATE_ANALYSYS_DATA_GENERATOR[keyof typeof LOG_RATE_ANALYSYS_DATA_GENERATOR]; + export interface GeneratedDoc { user: string; response_code: string; @@ -18,6 +29,7 @@ export interface GeneratedDoc { version: string; '@timestamp': number; should_ignore_this_field: string; + message?: string; } const REFERENCE_TS = 1669018354793; @@ -26,7 +38,16 @@ const DAY_MS = 86400000; const DEVIATION_TS = REFERENCE_TS - DAY_MS * 2; const BASELINE_TS = DEVIATION_TS - DAY_MS * 1; -function getArtificialLogsWithDeviation(index: string, deviationType: string) { +function getMessage(timestamp: number, user: string, url: string, responseCode: string) { + const date = new Date(timestamp); + return `${user} [${date.toLocaleString('en-US')}] "GET /${url} HTTP/1.1" ${responseCode}`; +} + +function getArtificialLogsWithDeviation( + index: string, + deviationType: string, + includeTextField = false +) { const bulkBody: estypes.BulkRequest['body'] = []; const action = { index: { _index: index } }; let tsOffset = 0; @@ -47,15 +68,20 @@ function getArtificialLogsWithDeviation(index: string, deviationType: string) { tsOffset = 0; [...Array(100)].forEach(() => { tsOffset += Math.round(DAY_MS / 100); + const timestamp = ts + tsOffset; const doc: GeneratedDoc = { user, response_code: responseCode, url, version: 'v1.0.0', - '@timestamp': ts + tsOffset, + '@timestamp': timestamp, should_ignore_this_field: 'should_ignore_this_field', }; + if (includeTextField) { + doc.message = getMessage(timestamp, user, url, responseCode); + } + bulkBody.push(action); bulkBody.push(doc); }); @@ -77,17 +103,24 @@ function getArtificialLogsWithDeviation(index: string, deviationType: string) { tsOffset = 0; [...Array(docsPerUrl1[url])].forEach(() => { tsOffset += Math.round(DAY_MS / docsPerUrl1[url]); - bulkBody.push(action); - bulkBody.push({ + const timestamp = + (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) + tsOffset; + + const doc: GeneratedDoc = { user: 'Peter', response_code: responseCode, url, version: 'v1.0.0', - '@timestamp': - (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) + - tsOffset, + '@timestamp': timestamp, should_ignore_this_field: 'should_ignore_this_field', - }); + }; + + if (includeTextField) { + doc.message = getMessage(timestamp, 'Peter', url, responseCode); + } + + bulkBody.push(action); + bulkBody.push(doc); }); }); }); @@ -102,17 +135,24 @@ function getArtificialLogsWithDeviation(index: string, deviationType: string) { tsOffset = 0; [...Array(docsPerUrl2[url] + userIndex)].forEach(() => { tsOffset += Math.round(DAY_MS / docsPerUrl2[url]); - bulkBody.push(action); - bulkBody.push({ + const timestamp = + (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) + tsOffset; + + const doc: GeneratedDoc = { user, response_code: '500', url, version: 'v1.0.0', - '@timestamp': - (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) + - tsOffset, + '@timestamp': timestamp, should_ignore_this_field: 'should_ignore_this_field', - }); + }; + + if (includeTextField) { + doc.message = 'an unexpected error occured'; + } + + bulkBody.push(action); + bulkBody.push(doc); }); }); }); @@ -126,7 +166,7 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider const log = getService('log'); return new (class DataGenerator { - public async generateData(dataGenerator: string) { + public async generateData(dataGenerator: LogRateAnalysisDataGenerator) { switch (dataGenerator) { case 'kibana_sample_data_logs': // will be added via UI @@ -164,12 +204,19 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider }); break; - case 'artificial_logs_with_spike': - case 'artificial_logs_with_dip': + case 'artificial_logs_with_spike_notextfield': + case 'artificial_logs_with_spike_textfield': + case 'artificial_logs_with_dip_notextfield': + case 'artificial_logs_with_dip_textfield': try { - await es.indices.delete({ + const indexExists = await es.indices.exists({ index: dataGenerator, }); + if (indexExists) { + await es.indices.delete({ + index: dataGenerator, + }); + } } catch (e) { log.info(`Could not delete index '${dataGenerator}' in before() callback`); } @@ -185,16 +232,18 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider version: { type: 'keyword' }, '@timestamp': { type: 'date' }, should_ignore_this_field: { type: 'keyword', doc_values: false, index: false }, + message: { type: 'text' }, }, }, }); + const dataGeneratorOptions = dataGenerator.split('_'); + const deviationType = dataGeneratorOptions[3] ?? LOG_RATE_ANALYSIS_TYPE.SPIKE; + const textField = dataGeneratorOptions[4] === 'textfield' ?? false; + await es.bulk({ refresh: 'wait_for', - body: getArtificialLogsWithDeviation( - dataGenerator, - dataGenerator.split('_').pop() ?? LOG_RATE_ANALYSIS_TYPE.SPIKE - ), + body: getArtificialLogsWithDeviation(dataGenerator, deviationType, textField), }); break; @@ -203,7 +252,7 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider } } - public async removeGeneratedData(dataGenerator: string) { + public async removeGeneratedData(dataGenerator: LogRateAnalysisDataGenerator) { switch (dataGenerator) { case 'kibana_sample_data_logs': // do not remove @@ -213,8 +262,10 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider await esArchiver.unload('x-pack/test/functional/es_archives/ml/farequote'); break; - case 'artificial_logs_with_spike': - case 'artificial_logs_with_dip': + case 'artificial_logs_with_spike_notextfield': + case 'artificial_logs_with_spike_textfield': + case 'artificial_logs_with_dip_notextfield': + case 'artificial_logs_with_dip_textfield': try { await es.indices.delete({ index: dataGenerator,