From 5c074ea9d61e45f990b9b63694771236955c5b5a Mon Sep 17 00:00:00 2001 From: Patrick Mueller Date: Tue, 23 Jul 2024 16:17:11 -0400 Subject: [PATCH] [ResponseOps] log error when ES Query rules find docs out of time range (#186332) resolves https://github.com/elastic/kibana/issues/175980 ## Summary Adds a check with logging if an ES Query rule returns hits which fall outside the time range it's searching. This shouldn't ever happen, but seems to be happening on rare occaisons, so we wanted to add some diagnostics to try to help narrow down the problem. Note that the ES|QL flavor rule does not use this diagnostic, just search source (KQL) and query dsl. We check 3 things: - ensure the `dateStart` sent to fetch was valid - ensure the `dateEnd` sent to fetch was valid - ensure the relevant time fields in hits are within the dateStart/dateEnd range These produce three different error messages: `For rule '', hits were returned with invalid time range start date '' from field '' using query ` `For rule '', hits were returned with invalid time range end date '' from field '' using query ` `For rule '', the hit with date '' from field '' is outside the query time range. Query: . Document: ` Each message has one tag on it: `query-result-out-of-time-range` ## To Verify To test invalid dateStart/dateEnd, hack the Kibana code to set the values to NaN's: https://github.com/elastic/kibana/blob/d30da09707f85d84d7fd555733ba8e0cb595228b/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts#L263-L264 For instance, change that to: const epochStart = new Date('x').getTime(); const epochEnd = new Date('y').getTime(); To test the invdivual document hits, first back out the change you made above - when those error, the checks we're testing below do not run. Hack the Kibana code to make the time out of range: https://github.com/elastic/kibana/blob/d30da09707f85d84d7fd555733ba8e0cb595228b/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts#L294 For instance, change that to: const epochDate = epochStart - 100 For both tests, create an es query rule - kql or dsl - make the relevant changes, and arrange for the rule to get hits each time. The relevant messages should be logged in the Kibana console when the rule runs. ### Checklist Delete any items that are not applicable to this PR. - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios --------- Co-authored-by: Elastic Machine (cherry picked from commit e12e4496e0594d34509ef9235d1d7b7e3461e5d8) # Conflicts: # x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_search_source_query.ts --- .../rule_types/es_query/executor.test.ts | 276 ++++++++++++++++++ .../server/rule_types/es_query/executor.ts | 98 ++++++- .../rule_types/es_query/lib/fetch_es_query.ts | 1 + .../es_query/lib/fetch_esql_query.ts | 1 + .../es_query/lib/fetch_search_source_query.ts | 8 +- 5 files changed, 377 insertions(+), 7 deletions(-) diff --git a/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.test.ts b/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.test.ts index 7ab974dc8d82..99819eca3c26 100644 --- a/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.test.ts +++ b/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.test.ts @@ -882,6 +882,282 @@ describe('es_query executor', () => { expect(mockSetLimitReached).toHaveBeenCalledTimes(1); expect(mockSetLimitReached).toHaveBeenCalledWith(false); }); + + it('should log messages for hits with out-of-range dates for search source', async () => { + const epoch = Date.now(); + const oneYear = 1000 * 60 * 60 * 24 * 365; + const dateEarly = new Date(epoch - oneYear).toISOString(); + const dateStart = new Date(epoch - 1000).toISOString(); + const dateMiddle = new Date(epoch - 500).toISOString(); + const dateEnd = new Date(epoch).toISOString(); + const dateLate = new Date(epoch + oneYear).toISOString(); + + function getTimeRange() { + return { dateStart, dateEnd }; + } + + mockFetchSearchSourceQuery.mockResolvedValueOnce({ + parsedResults: { + results: [ + { + group: 'all documents', + count: 3, + hits: [ + { _source: { '@timestamp': dateEarly, value: 1 } }, + { _source: { '@timestamp': dateMiddle, value: 2 } }, + { _source: { '@timestamp': dateLate, value: 3 } }, + ], + }, + ], + }, + truncated: false, + query: 'the query would go here', + }); + + const executorOptions: ExecutorOptions = { + ...defaultExecutorOptions, + getTimeRange, + params: { + ...defaultProps, + searchType: 'searchSource', + timeField: '@timestamp', + }, + }; + await executor(coreMock, executorOptions); + + const allLogCalls = loggerMock.collect(logger); + const messages: string[] = []; + for (const parms of allLogCalls.error) { + const message = parms.shift(); + messages.push(`${message}`); + } + + expect(messages).toEqual([ + `For rule 'test-rule-id', the hit with date '${dateEarly}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateEarly}\",\"value\":1}}>`, + `For rule 'test-rule-id', the hit with date '${dateLate}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateLate}\",\"value\":3}}>`, + ]); + expect(allLogCalls).toMatchInlineSnapshot(` + Object { + "debug": Array [], + "error": Array [ + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + ], + "fatal": Array [], + "info": Array [], + "log": Array [], + "trace": Array [], + "warn": Array [], + } + `); + }); + + it('should log messages for bad start / end dates for search source', async () => { + function getTimeRange() { + return { dateStart: 'x', dateEnd: 'y' }; + } + + mockFetchSearchSourceQuery.mockResolvedValueOnce({ + parsedResults: { + results: [ + { + group: 'all documents', + count: 1, + hits: [{ _source: { '@timestamp': new Date().toISOString() } }], + }, + ], + }, + truncated: false, + query: 'the query would go here', + }); + + const executorOptions: ExecutorOptions = { + ...defaultExecutorOptions, + getTimeRange, + params: { + ...defaultProps, + searchType: 'searchSource', + timeField: '@timestamp', + }, + }; + await executor(coreMock, executorOptions); + + const allLogCalls = loggerMock.collect(logger); + const messages: string[] = []; + for (const parms of allLogCalls.error) { + const message = parms.shift(); + messages.push(`${message}`); + } + + expect(messages).toEqual([ + `For rule 'test-rule-id', hits were returned with invalid time range start date 'x' from field '@timestamp' using query <"the query would go here">`, + `For rule 'test-rule-id', hits were returned with invalid time range end date 'y' from field '@timestamp' using query <"the query would go here">`, + ]); + expect(allLogCalls.error).toMatchInlineSnapshot(` + Array [ + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + ] + `); + }); + + it('should log messages for hits with out-of-range dates for query dsl', async () => { + const epoch = Date.now(); + const oneYear = 1000 * 60 * 60 * 24 * 365; + const dateEarly = new Date(epoch - oneYear).toISOString(); + const dateStart = new Date(epoch - 1000).toISOString(); + const dateMiddle = new Date(epoch - 500).toISOString(); + const dateEnd = new Date(epoch).toISOString(); + const dateLate = new Date(epoch + oneYear).toISOString(); + + function getTimeRange() { + return { dateStart, dateEnd }; + } + + mockFetchEsQuery.mockResolvedValueOnce({ + parsedResults: { + results: [ + { + group: 'all documents', + count: 3, + hits: [ + { _source: { '@timestamp': dateEarly, value: 1 } }, + { _source: { '@timestamp': dateMiddle, value: 2 } }, + { _source: { '@timestamp': dateLate, value: 3 } }, + ], + }, + ], + }, + truncated: false, + query: 'the query would go here', + }); + + const executorOptions: ExecutorOptions = { + ...defaultExecutorOptions, + getTimeRange, + params: { + ...defaultProps, + searchType: 'esQuery', + timeField: '@timestamp', + }, + }; + await executor(coreMock, executorOptions); + + const allLogCalls = loggerMock.collect(logger); + const messages: string[] = []; + for (const parms of allLogCalls.error) { + const message = parms.shift(); + messages.push(`${message}`); + } + + expect(messages).toEqual([ + `For rule 'test-rule-id', the hit with date '${dateEarly}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateEarly}\",\"value\":1}}>`, + `For rule 'test-rule-id', the hit with date '${dateLate}' from field '@timestamp' is outside the query time range. Query: <\"the query would go here\">. Document: <{\"_source\":{\"@timestamp\":\"${dateLate}\",\"value\":3}}>`, + ]); + expect(allLogCalls.error).toMatchInlineSnapshot(` + Array [ + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + ] + `); + }); + + it('should log messages for bad start / end dates for query dsl', async () => { + function getTimeRange() { + return { dateStart: 'x', dateEnd: 'y' }; + } + + mockFetchEsQuery.mockResolvedValueOnce({ + parsedResults: { + results: [ + { + group: 'all documents', + count: 1, + hits: [{ _source: { '@timestamp': new Date().toISOString() } }], + }, + ], + }, + truncated: false, + query: 'the query would go here', + }); + + const executorOptions: ExecutorOptions = { + ...defaultExecutorOptions, + getTimeRange, + params: { + ...defaultProps, + searchType: 'esQuery', + timeField: '@timestamp', + }, + }; + await executor(coreMock, executorOptions); + + const allLogCalls = loggerMock.collect(logger); + const messages: string[] = []; + for (const parms of allLogCalls.error) { + const message = parms.shift(); + messages.push(`${message}`); + } + + expect(messages).toEqual([ + `For rule 'test-rule-id', hits were returned with invalid time range start date 'x' from field '@timestamp' using query <"the query would go here">`, + `For rule 'test-rule-id', hits were returned with invalid time range end date 'y' from field '@timestamp' using query <"the query would go here">`, + ]); + expect(allLogCalls.error).toMatchInlineSnapshot(` + Array [ + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + Array [ + Object { + "tags": Array [ + "query-result-out-of-time-range", + ], + }, + ], + ] + `); + }); }); describe('tryToParseAsDate', () => { diff --git a/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts b/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts index 5e23bf9498ec..6f84e73118ec 100644 --- a/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts +++ b/x-pack/plugins/stack_alerts/server/rule_types/es_query/executor.ts @@ -6,7 +6,7 @@ */ import { sha256 } from 'js-sha256'; import { i18n } from '@kbn/i18n'; -import { CoreSetup } from '@kbn/core/server'; +import { CoreSetup, Logger } from '@kbn/core/server'; import { isGroupAggregation, UngroupedGroupId } from '@kbn/triggers-actions-ui-plugin/common'; import { ALERT_EVALUATION_THRESHOLD, @@ -16,6 +16,9 @@ import { } from '@kbn/rule-data-utils'; import { AlertsClientError } from '@kbn/alerting-plugin/server'; +import { get } from 'lodash'; +import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; + import { ComparatorFns } from '../../../common'; import { addMessages, @@ -71,7 +74,7 @@ export async function executor(core: CoreSetup, options: ExecutorOptions>, + timeField: string | undefined, + dateStart: string, + dateEnd: string, + query: unknown +) { + if (!timeField) return; + + const epochStart = new Date(dateStart).getTime(); + const epochEnd = new Date(dateEnd).getTime(); + const messageMeta = { tags: ['query-result-out-of-time-range'] }; + + const messagePrefix = `For rule '${ruleId}'`; + const usingQuery = `using query <${JSON.stringify(query)}>`; + const hitsWereReturned = 'hits were returned with invalid time range'; + + let errors = 0; + if (isNaN(epochStart)) { + errors++; + logger.error( + `${messagePrefix}, ${hitsWereReturned} start date '${dateStart}' from field '${timeField}' ${usingQuery}`, + messageMeta + ); + } + + if (isNaN(epochEnd)) { + errors++; + logger.error( + `${messagePrefix}, ${hitsWereReturned} end date '${dateEnd}' from field '${timeField}' ${usingQuery}`, + messageMeta + ); + } + + if (errors > 0) return; + + const outsideTimeRange = 'outside the query time range'; + + for (const hit of hits) { + const dateVal = get(hit, `_source.${timeField}`); + const epochDate = getEpochDateFromString(dateVal); + + if (epochDate) { + if (epochDate < epochStart || epochDate > epochEnd) { + const message = `the hit with date '${dateVal}' from field '${timeField}' is ${outsideTimeRange}`; + const queryString = `Query: <${JSON.stringify(query)}>`; + const document = `Document: <${JSON.stringify(hit)}>`; + logger.error(`${messagePrefix}, ${message}. ${queryString}. ${document}`, messageMeta); + } + } + } +} + +function getEpochDateFromString(dateString: string): number | null { + let date: Date; + try { + date = new Date(dateString); + } catch (e) { + return null; + } + + const time = date.getTime(); + if (!isNaN(time)) return time; + + // if not a valid date string, try it as a stringified number + const dateNum = parseInt(dateString, 10); + if (isNaN(dateNum)) return null; + + const timeFromNumber = new Date(dateNum).getTime(); + if (isNaN(timeFromNumber)) return null; + + return timeFromNumber; +} + export function getValidTimefieldSort( sortValues: Array = [] ): undefined | string { diff --git a/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_es_query.ts b/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_es_query.ts index fdff3356faab..b51de616587d 100644 --- a/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_es_query.ts +++ b/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_es_query.ts @@ -144,6 +144,7 @@ export async function fetchEsQuery({ sourceFieldsParams: params.sourceFields, }), link, + query: sortedQuery, index: params.index, }; } diff --git a/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_esql_query.ts b/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_esql_query.ts index 5ecb258b0579..59686220bb3b 100644 --- a/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_esql_query.ts +++ b/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_esql_query.ts @@ -56,6 +56,7 @@ export async function fetchEsqlQuery({ return { link, + query, numMatches: Number(response.values.length), parsedResults: parseAggregationResults({ isCountAgg: true, diff --git a/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_search_source_query.ts b/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_search_source_query.ts index e0f6613fb5a1..13689b135cd4 100644 --- a/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_search_source_query.ts +++ b/x-pack/plugins/stack_alerts/server/rule_types/es_query/lib/fetch_search_source_query.ts @@ -69,11 +69,8 @@ export async function fetchSearchSourceQuery({ alertLimit ); - logger.debug( - `search source query rule (${ruleId}) query: ${JSON.stringify( - searchSource.getSearchRequestBody() - )}` - ); + const searchRequestBody: unknown = searchSource.getSearchRequestBody(); + logger.debug(`search source query rule (${ruleId}) query: ${JSON.stringify(searchRequestBody)}`); const searchResult = await searchSource.fetch(); @@ -98,6 +95,7 @@ export async function fetchSearchSourceQuery({ sourceFieldsParams: params.sourceFields, }), index: [index.name], + query: searchRequestBody, }; }