Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Security Solution] Classify EQL verification and ML job missing errors as user errors #180094

Merged
merged 6 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ export const createRuleExecutionLogClientForExecutors = (
};

const writeStatusChangeToRuleObject = async (args: NormalizedStatusChangeArgs): Promise<void> => {
const { newStatus, message, metrics } = args;
const { newStatus, message, metrics, userError } = args;

if (newStatus === RuleExecutionStatusEnum.running) {
return;
Expand All @@ -189,7 +189,7 @@ export const createRuleExecutionLogClientForExecutors = (
}

if (newStatus === RuleExecutionStatusEnum.failed) {
ruleResultService.addLastRunError(message);
ruleResultService.addLastRunError(message, userError ?? false);
} else if (newStatus === RuleExecutionStatusEnum['partial failure']) {
ruleResultService.addLastRunWarning(message);
}
Expand Down Expand Up @@ -233,6 +233,7 @@ interface NormalizedStatusChangeArgs {
newStatus: RuleExecutionStatus;
message: string;
metrics?: RuleExecutionMetrics;
userError?: boolean;
}

const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChangeArgs => {
Expand All @@ -242,7 +243,7 @@ const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChan
message: '',
};
}
const { newStatus, message, metrics } = args;
const { newStatus, message, metrics, userError } = args;

return {
newStatus,
Expand All @@ -255,6 +256,7 @@ const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChan
execution_gap_duration_s: normalizeGap(metrics.executionGap),
}
: undefined,
userError,
};
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ export interface StatusChangeArgs {
newStatus: RuleExecutionStatus;
message?: string;
metrics?: MetricsArgs;
userError?: boolean;
}

export interface MetricsArgs {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper =
success: result.success && runResult.success,
warning: warningMessages.length > 0,
warningMessages,
userError: runResult.userError,
};
runState = runResult.state;
}
Expand Down Expand Up @@ -516,6 +517,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper =
indexingDurations: result.bulkCreateTimes,
enrichmentDurations: result.enrichmentTimes,
},
userError: result.userError,
});
}
} catch (error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,29 @@ describe('eql_executor', () => {
}`,
]);
});

it('should classify EQL verification exceptions as "user errors" when reporting to the framework', async () => {
alertServices.scopedClusterClient.asCurrentUser.eql.search.mockRejectedValue({
name: 'ResponseError',
message:
'verification_exception\n\tRoot causes:\n\t\tverification_exception: Found 1 problem\nline 1:1: Unknown column [event.category]',
});
const result = await eqlExecutor({
inputIndex: DEFAULT_INDEX_PATTERN,
runtimeMappings: {},
completeRule: eqlCompleteRule,
tuple,
ruleExecutionLogger,
services: alertServices,
version,
bulkCreate: jest.fn(),
wrapHits: jest.fn(),
wrapSequences: jest.fn(),
primaryTimestamp: '@timestamp',
exceptionFilter: undefined,
unprocessedExceptions: [],
});
expect(result.userError).toEqual(true);
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -100,40 +100,51 @@ export const eqlExecutor = async ({
}
const eqlSignalSearchStart = performance.now();

const response = await services.scopedClusterClient.asCurrentUser.eql.search<SignalSource>(
request
);
try {
const response = await services.scopedClusterClient.asCurrentUser.eql.search<SignalSource>(
request
);

const eqlSignalSearchEnd = performance.now();
const eqlSearchDuration = makeFloatString(eqlSignalSearchEnd - eqlSignalSearchStart);
result.searchAfterTimes = [eqlSearchDuration];
const eqlSignalSearchEnd = performance.now();
const eqlSearchDuration = makeFloatString(eqlSignalSearchEnd - eqlSignalSearchStart);
result.searchAfterTimes = [eqlSearchDuration];

let newSignals: Array<WrappedFieldsLatest<BaseFieldsLatest>> | undefined;
if (response.hits.sequences !== undefined) {
newSignals = wrapSequences(response.hits.sequences, buildReasonMessageForEqlAlert);
} else if (response.hits.events !== undefined) {
newSignals = wrapHits(response.hits.events, buildReasonMessageForEqlAlert);
} else {
throw new Error(
'eql query response should have either `sequences` or `events` but had neither'
);
}
let newSignals: Array<WrappedFieldsLatest<BaseFieldsLatest>> | undefined;
if (response.hits.sequences !== undefined) {
newSignals = wrapSequences(response.hits.sequences, buildReasonMessageForEqlAlert);
} else if (response.hits.events !== undefined) {
newSignals = wrapHits(response.hits.events, buildReasonMessageForEqlAlert);
} else {
throw new Error(
'eql query response should have either `sequences` or `events` but had neither'
);
}

if (newSignals?.length) {
const createResult = await bulkCreate(
newSignals,
undefined,
createEnrichEventsFunction({
services,
logger: ruleExecutionLogger,
})
);
if (newSignals?.length) {
const createResult = await bulkCreate(
newSignals,
undefined,
createEnrichEventsFunction({
services,
logger: ruleExecutionLogger,
})
);

addToSearchAfterReturn({ current: result, next: createResult });
}
if (response.hits.total && response.hits.total.value >= ruleParams.maxSignals) {
result.warningMessages.push(getMaxSignalsWarning());
addToSearchAfterReturn({ current: result, next: createResult });
}
if (response.hits.total && response.hits.total.value >= ruleParams.maxSignals) {
result.warningMessages.push(getMaxSignalsWarning());
}
return result;
} catch (error) {
if ((error.message as string).includes('verification_exception')) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JS allow to throw anything. Generally speaking message can be undefined in rare cases. Since it's an error handler I'd recommend to add more robust check here like

if (error instanceof Error && error.message.includes('verification_exception')) {

// We report errors that are more related to user configuration of rules rather than system outages as "user errors"
// so SLO dashboards can show less noise around system outages
result.userError = true;
}
result.errors.push(error.message);
result.success = false;
return result;
}
return result;
});
};
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ describe('ml_executor', () => {
errors: [],
createdItems: [],
});
jobsSummaryMock.mockResolvedValue([]);
});

it('should throw an error if ML plugin was not available', async () => {
Expand Down Expand Up @@ -131,4 +132,26 @@ describe('ml_executor', () => {
);
expect(response.warningMessages.length).toEqual(1);
});

it('should report job missing errors as user errors', async () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ML test suite is skipped, but this test works locally if you unskip the suite

(findMlSignals as jest.Mock).mockRejectedValue({
message: 'my_test_job_name missing',
});

const result = await mlExecutor({
completeRule: mlCompleteRule,
tuple,
ml: mlMock,
services: alertServices,
ruleExecutionLogger,
listClient,
bulkCreate: jest.fn(),
wrapHits: jest.fn(),
exceptionFilter: undefined,
unprocessedExceptions: [],
});
expect(result.userError).toEqual(true);
expect(result.success).toEqual(false);
expect(result.errors).toEqual(['my_test_job_name missing']);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* 2.0.
*/

/* eslint require-atomic-updates: ["error", { "allowProperties": true }] */

import type { KibanaRequest } from '@kbn/core/server';
import type { ExceptionListItemSchema } from '@kbn/securitysolution-io-ts-list-types';
import type {
Expand All @@ -30,6 +32,7 @@ import {
import type { SetupPlugins } from '../../../../plugin';
import { withSecuritySpan } from '../../../../utils/with_security_span';
import type { IRuleExecutionLogForExecutors } from '../../rule_monitoring';
import type { AnomalyResults } from '../../../machine_learning';

export const mlExecutor = async ({
completeRule,
Expand Down Expand Up @@ -93,19 +96,29 @@ export const mlExecutor = async ({
result.warning = true;
}

const anomalyResults = await findMlSignals({
ml,
// Using fake KibanaRequest as it is needed to satisfy the ML Services API, but can be empty as it is
// currently unused by the mlAnomalySearch function.
request: {} as unknown as KibanaRequest,
savedObjectsClient: services.savedObjectsClient,
jobIds: ruleParams.machineLearningJobId,
anomalyThreshold: ruleParams.anomalyThreshold,
from: tuple.from.toISOString(),
to: tuple.to.toISOString(),
maxSignals: tuple.maxSignals,
exceptionFilter,
});
let anomalyResults: AnomalyResults;
try {
anomalyResults = await findMlSignals({
ml,
// Using fake KibanaRequest as it is needed to satisfy the ML Services API, but can be empty as it is
// currently unused by the mlAnomalySearch function.
request: {} as unknown as KibanaRequest,
savedObjectsClient: services.savedObjectsClient,
jobIds: ruleParams.machineLearningJobId,
anomalyThreshold: ruleParams.anomalyThreshold,
from: tuple.from.toISOString(),
to: tuple.to.toISOString(),
maxSignals: tuple.maxSignals,
exceptionFilter,
});
} catch (error) {
if ((error.message as string).endsWith('missing')) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same as above regarding the error type.

result.userError = true;
}
result.errors.push(error.message);
result.success = false;
return result;
}

if (
anomalyResults.hits.total &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export interface SecurityAlertTypeReturnValue<TState extends RuleTypeState> {
createdSignalsCount: number;
createdSignals: unknown[];
errors: string[];
userError?: boolean;
lastLookbackDate?: Date | null;
searchAfterTimes: string[];
state: TState;
Expand Down Expand Up @@ -388,6 +389,7 @@ export interface SearchAfterAndBulkCreateReturnType {
createdSignalsCount: number;
createdSignals: unknown[];
errors: string[];
userError?: boolean;
warningMessages: string[];
suppressedAlertsCount?: number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
*/

import { v4 as uuidv4 } from 'uuid';
import supertestLib from 'supertest';
import url from 'url';
import expect from '@kbn/expect';
import {
ALERT_REASON,
Expand All @@ -30,7 +32,10 @@ import {
ALERT_GROUP_ID,
} from '@kbn/security-solution-plugin/common/field_maps/field_names';
import { getMaxSignalsWarning as getMaxAlertsWarning } from '@kbn/security-solution-plugin/server/lib/detection_engine/rule_types/utils/utils';
import { ENABLE_ASSET_CRITICALITY_SETTING } from '@kbn/security-solution-plugin/common/constants';
import {
DETECTION_ENGINE_RULES_URL,
ENABLE_ASSET_CRITICALITY_SETTING,
} from '@kbn/security-solution-plugin/common/constants';
import {
getEqlRuleForAlertTesting,
getOpenAlerts,
Expand All @@ -42,6 +47,8 @@ import {
createRule,
deleteAllRules,
deleteAllAlerts,
waitForRuleFailure,
routeWithNamespace,
} from '../../../../../../../common/utils/security_solution';
import { FtrProviderContext } from '../../../../../../ftr_provider_context';
import { EsArchivePathBuilder } from '../../../../../../es_archive_path_builder';
Expand All @@ -62,6 +69,7 @@ export default ({ getService }: FtrProviderContext) => {

// TODO: add a new service for loading archiver files similar to "getService('es')"
const config = getService('config');
const request = supertestLib(url.format(config.get('servers.kibana')));
const isServerless = config.get('serverless');
const dataPathBuilder = new EsArchivePathBuilder(isServerless);
const auditPath = dataPathBuilder.getPath('auditbeat/hosts');
Expand Down Expand Up @@ -196,6 +204,42 @@ export default ({ getService }: FtrProviderContext) => {
});
});

it('classifies verification_exception errors as user errors', async () => {
function getMetricsRequest(reset: boolean = false) {
return request
.get(`/api/task_manager/metrics${reset ? '' : '?reset=false'}`)
.set('kbn-xsrf', 'foo')
.expect(200)
.then((response) => response.body);
}

await getMetricsRequest(true);
const rule: EqlRuleCreateProps = {
...getEqlRuleForAlertTesting(['auditbeat-*']),
query: 'file where field.doesnt.exist == true',
};
const createdRule = await createRule(supertest, log, rule);
await waitForRuleFailure({ supertest, log, id: createdRule.id });

const route = routeWithNamespace(DETECTION_ENGINE_RULES_URL);
const response = await supertest
.get(route)
.set('kbn-xsrf', 'true')
.set('elastic-api-version', '2023-10-31')
.query({ id: createdRule.id })
.expect(200);

const ruleResponse = response.body;
expect(
ruleResponse.execution_summary.last_execution.message.includes('verification_exception')
).eql(true);

const metricsResponse = await getMetricsRequest();
expect(
metricsResponse.metrics.task_run.value.by_type['alerting:siem__eqlRule'].user_errors
).eql(1);
});

it('generates up to max_alerts for non-sequence EQL queries', async () => {
const maxAlerts = 200;
const rule: EqlRuleCreateProps = {
Expand Down
Loading