Skip to content

Commit

Permalink
[ML] AIOps: Use field caps option include_empty_fields=false instea…
Browse files Browse the repository at this point in the history
…d of custom query. (#178699)

## Summary

Part of #178606.

As of elastic/elasticsearch#103651 there is a
new field caps option `include_empty_fields`. This PR updates AIOps Log
Rate Analysis to make use of this option instead of a custom query and
code that identified populated fields.

### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [x] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/5482
- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
  • Loading branch information
walterra authored Mar 14, 2024
1 parent cfbc456 commit 7b1af2a
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ describe('fetch_index_info', () => {
myKeywordFieldName: { keyword: { aggregatable: true } },
// Should not end up as a field candidate, it's a keyword but non-aggregatable
myKeywordFieldNameToBeIgnored: { keyword: { aggregatable: false } },
// Should not end up as a field candidate, based on this field caps result it would be
// but it will not be part of the mocked search result so will count as unpopulated.
myUnpopulatedKeywordFieldName: { keyword: { aggregatable: true } },
// Should not end up as a field candidate since fields of type number will not be considered
myNumericFieldName: { number: {} },
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import type { ElasticsearchClient } from '@kbn/core/server';

import type { AiopsLogRateAnalysisSchema } from '../../../../common/api/log_rate_analysis/schema';

import { getRandomDocsRequest } from './get_random_docs_request';
import { getTotalDocCountRequest } from './get_total_doc_count_request';

// TODO Consolidate with duplicate `fetchPValues` in
Expand All @@ -26,6 +25,8 @@ const SUPPORTED_ES_FIELD_TYPES = [

const SUPPORTED_ES_FIELD_TYPES_TEXT = [ES_FIELD_TYPES.TEXT, ES_FIELD_TYPES.MATCH_ONLY_TEXT];

const IGNORE_FIELD_NAMES = ['_tier'];

interface IndexInfo {
fieldCandidates: string[];
textFieldCandidates: string[];
Expand All @@ -46,14 +47,14 @@ export const fetchIndexInfo = async (
{
index,
fields: '*',
// @ts-expect-error include_empty_fields missing from FieldCapsRequest
include_empty_fields: false,
},
{ signal: abortSignal, maxRetries: 0 }
);

const allFieldNames: string[] = [];

const finalFieldCandidates: Set<string> = new Set([]);
const finalTextFieldCandidates: Set<string> = new Set([]);
const acceptableFields: Set<string> = new Set();
const acceptableTextFields: Set<string> = new Set();

Expand All @@ -64,11 +65,11 @@ export const fetchIndexInfo = async (
const isTextField = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES_TEXT.includes(type));

// Check if fieldName is something we can aggregate on
if (isSupportedType && isAggregatable) {
if (isSupportedType && isAggregatable && !IGNORE_FIELD_NAMES.includes(key)) {
acceptableFields.add(key);
}

if (isTextField) {
if (isTextField && !IGNORE_FIELD_NAMES.includes(key)) {
acceptableTextFields.add(key);
}

Expand All @@ -84,46 +85,35 @@ export const fetchIndexInfo = async (
}
);

// Only the deviation window will be used to identify field candidates and sample probability based on total doc count.
const respDeviationRandomDocs = await esClient.search(
getRandomDocsRequest({ ...params, start: params.deviationMin, end: params.deviationMax }),
// Get the total doc count for the deviation time range
const respDeviationTotalDocCount = await esClient.search(
getTotalDocCountRequest({ ...params, start: params.deviationMin, end: params.deviationMax }),
{
signal: abortSignal,
maxRetries: 0,
}
);
const sampledDocs = respDeviationRandomDocs.hits.hits.map((d) => d.fields ?? {});

const textFieldCandidatesOverridesWithKeywordPostfix = textFieldCandidatesOverrides.map(
(d) => `${d}.keyword`
);

// Get all field names for each returned doc and flatten it
// to a list of unique field names used across all docs
// and filter by list of acceptable fields.
[...new Set(sampledDocs.map(Object.keys).flat(1))].forEach((field) => {
if (
acceptableFields.has(field) &&
!textFieldCandidatesOverridesWithKeywordPostfix.includes(field)
) {
finalFieldCandidates.add(field);
}
if (
acceptableTextFields.has(field) &&
(!allFieldNames.includes(`${field}.keyword`) || textFieldCandidatesOverrides.includes(field))
) {
finalTextFieldCandidates.add(field);
}
});
const fieldCandidates: string[] = [...acceptableFields].filter(
(field) => !textFieldCandidatesOverridesWithKeywordPostfix.includes(field)
);
const textFieldCandidates: string[] = [...acceptableTextFields].filter(
(field) =>
!allFieldNames.includes(`${field}.keyword`) || textFieldCandidatesOverrides.includes(field)
);

const baselineTotalDocCount = (respBaselineTotalDocCount.hits.total as estypes.SearchTotalHits)
.value;
const deviationTotalDocCount = (respDeviationRandomDocs.hits.total as estypes.SearchTotalHits)
const deviationTotalDocCount = (respDeviationTotalDocCount.hits.total as estypes.SearchTotalHits)
.value;

return {
fieldCandidates: [...finalFieldCandidates],
textFieldCandidates: [...finalTextFieldCandidates],
fieldCandidates,
textFieldCandidates,
baselineTotalDocCount,
deviationTotalDocCount,
zeroDocsFallback: baselineTotalDocCount === 0 || deviationTotalDocCount === 0,
Expand Down

This file was deleted.

This file was deleted.

0 comments on commit 7b1af2a

Please sign in to comment.