[ML] AIOps: Use field caps option include_empty_fields=false instea…

…d of custom query. (#178699) ## Summary Part of #178606. As of elastic/elasticsearch#103651 there is a new field caps option `include_empty_fields`. This PR updates AIOps Log Rate Analysis to make use of this option instead of a custom query and code that identified populated fields. ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [x] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5482 - [x] This was checked for breaking API changes and was [labeled appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
elastic · Mar 14, 2024 · 7b1af2a · 7b1af2a
1 parent cfbc456
commit 7b1af2a
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 123 deletions.
diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.test.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.test.ts
@@ -24,9 +24,6 @@ describe('fetch_index_info', () => {
           myKeywordFieldName: { keyword: { aggregatable: true } },
           // Should not end up as a field candidate, it's a keyword but non-aggregatable
           myKeywordFieldNameToBeIgnored: { keyword: { aggregatable: false } },
-          // Should not end up as a field candidate, based on this field caps result it would be
-          // but it will not be part of the mocked search result so will count as unpopulated.
-          myUnpopulatedKeywordFieldName: { keyword: { aggregatable: true } },
           // Should not end up as a field candidate since fields of type number will not be considered
           myNumericFieldName: { number: {} },
         },

diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_index_info.ts
@@ -12,7 +12,6 @@ import type { ElasticsearchClient } from '@kbn/core/server';
 
 import type { AiopsLogRateAnalysisSchema } from '../../../../common/api/log_rate_analysis/schema';
 
-import { getRandomDocsRequest } from './get_random_docs_request';
 import { getTotalDocCountRequest } from './get_total_doc_count_request';
 
 // TODO Consolidate with duplicate `fetchPValues` in
@@ -26,6 +25,8 @@ const SUPPORTED_ES_FIELD_TYPES = [
 
 const SUPPORTED_ES_FIELD_TYPES_TEXT = [ES_FIELD_TYPES.TEXT, ES_FIELD_TYPES.MATCH_ONLY_TEXT];
 
+const IGNORE_FIELD_NAMES = ['_tier'];
+
 interface IndexInfo {
   fieldCandidates: string[];
   textFieldCandidates: string[];
@@ -46,14 +47,14 @@ export const fetchIndexInfo = async (
     {
       index,
       fields: '*',
+      // @ts-expect-error include_empty_fields missing from FieldCapsRequest
+      include_empty_fields: false,
     },
     { signal: abortSignal, maxRetries: 0 }
   );
 
   const allFieldNames: string[] = [];
 
-  const finalFieldCandidates: Set<string> = new Set([]);
-  const finalTextFieldCandidates: Set<string> = new Set([]);
   const acceptableFields: Set<string> = new Set();
   const acceptableTextFields: Set<string> = new Set();
 
@@ -64,11 +65,11 @@ export const fetchIndexInfo = async (
     const isTextField = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES_TEXT.includes(type));
 
     // Check if fieldName is something we can aggregate on
-    if (isSupportedType && isAggregatable) {
+    if (isSupportedType && isAggregatable && !IGNORE_FIELD_NAMES.includes(key)) {
       acceptableFields.add(key);
     }
 
-    if (isTextField) {
+    if (isTextField && !IGNORE_FIELD_NAMES.includes(key)) {
       acceptableTextFields.add(key);
     }
 
@@ -84,46 +85,35 @@ export const fetchIndexInfo = async (
     }
   );
 
-  // Only the deviation window will be used to identify field candidates and sample probability based on total doc count.
-  const respDeviationRandomDocs = await esClient.search(
-    getRandomDocsRequest({ ...params, start: params.deviationMin, end: params.deviationMax }),
+  // Get the total doc count for the deviation time range
+  const respDeviationTotalDocCount = await esClient.search(
+    getTotalDocCountRequest({ ...params, start: params.deviationMin, end: params.deviationMax }),
     {
       signal: abortSignal,
       maxRetries: 0,
     }
   );
-  const sampledDocs = respDeviationRandomDocs.hits.hits.map((d) => d.fields ?? {});
 
   const textFieldCandidatesOverridesWithKeywordPostfix = textFieldCandidatesOverrides.map(
     (d) => `${d}.keyword`
   );
 
-  // Get all field names for each returned doc and flatten it
-  // to a list of unique field names used across all docs
-  // and filter by list of acceptable fields.
-  [...new Set(sampledDocs.map(Object.keys).flat(1))].forEach((field) => {
-    if (
-      acceptableFields.has(field) &&
-      !textFieldCandidatesOverridesWithKeywordPostfix.includes(field)
-    ) {
-      finalFieldCandidates.add(field);
-    }
-    if (
-      acceptableTextFields.has(field) &&
-      (!allFieldNames.includes(`${field}.keyword`) || textFieldCandidatesOverrides.includes(field))
-    ) {
-      finalTextFieldCandidates.add(field);
-    }
-  });
+  const fieldCandidates: string[] = [...acceptableFields].filter(
+    (field) => !textFieldCandidatesOverridesWithKeywordPostfix.includes(field)
+  );
+  const textFieldCandidates: string[] = [...acceptableTextFields].filter(
+    (field) =>
+      !allFieldNames.includes(`${field}.keyword`) || textFieldCandidatesOverrides.includes(field)
+  );
 
   const baselineTotalDocCount = (respBaselineTotalDocCount.hits.total as estypes.SearchTotalHits)
     .value;
-  const deviationTotalDocCount = (respDeviationRandomDocs.hits.total as estypes.SearchTotalHits)
+  const deviationTotalDocCount = (respDeviationTotalDocCount.hits.total as estypes.SearchTotalHits)
     .value;
 
   return {
-    fieldCandidates: [...finalFieldCandidates],
-    textFieldCandidates: [...finalTextFieldCandidates],
+    fieldCandidates,
+    textFieldCandidates,
     baselineTotalDocCount,
     deviationTotalDocCount,
     zeroDocsFallback: baselineTotalDocCount === 0 || deviationTotalDocCount === 0,

diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.test.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.test.ts
diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/get_random_docs_request.ts