elastic · marshallmain · Apr 10, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 5, 2024
diff --git a/.../detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client.ts b/.../detection_engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client.ts
@@ -164,7 +164,7 @@ export const createRuleExecutionLogClientForExecutors = (
   };
 
   const writeStatusChangeToRuleObject = async (args: NormalizedStatusChangeArgs): Promise<void> => {
-    const { newStatus, message, metrics } = args;
+    const { newStatus, message, metrics, userError } = args;
 
     if (newStatus === RuleExecutionStatusEnum.running) {
       return;
@@ -189,7 +189,7 @@ export const createRuleExecutionLogClientForExecutors = (
     }
 
     if (newStatus === RuleExecutionStatusEnum.failed) {
-      ruleResultService.addLastRunError(message);
+      ruleResultService.addLastRunError(message, userError ?? false);
     } else if (newStatus === RuleExecutionStatusEnum['partial failure']) {
       ruleResultService.addLastRunWarning(message);
     }
@@ -233,6 +233,7 @@ interface NormalizedStatusChangeArgs {
   newStatus: RuleExecutionStatus;
   message: string;
   metrics?: RuleExecutionMetrics;
+  userError?: boolean;
 }
 
 const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChangeArgs => {
@@ -242,7 +243,7 @@ const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChan
       message: '',
     };
   }
-  const { newStatus, message, metrics } = args;
+  const { newStatus, message, metrics, userError } = args;
 
   return {
     newStatus,
@@ -255,6 +256,7 @@ const normalizeStatusChangeArgs = (args: StatusChangeArgs): NormalizedStatusChan
           execution_gap_duration_s: normalizeGap(metrics.executionGap),
         }
       : undefined,
+    userError,
   };
 };
 

diff --git a/..._engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client_interface.ts b/..._engine/rule_monitoring/logic/rule_execution_log/client_for_executors/client_interface.ts
@@ -122,6 +122,7 @@ export interface StatusChangeArgs {
   newStatus: RuleExecutionStatus;
   message?: string;
   metrics?: MetricsArgs;
+  userError?: boolean;
 }
 
 export interface MetricsArgs {

diff --git a/...rity_solution/server/lib/detection_engine/rule_types/create_security_rule_type_wrapper.ts b/...rity_solution/server/lib/detection_engine/rule_types/create_security_rule_type_wrapper.ts
@@ -440,6 +440,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper =
                   success: result.success && runResult.success,
                   warning: warningMessages.length > 0,
                   warningMessages,
+                  userError: runResult.userError,
                 };
                 runState = runResult.state;
               }
@@ -516,6 +517,7 @@ export const createSecurityRuleTypeWrapper: CreateSecurityRuleTypeWrapper =
                   indexingDurations: result.bulkCreateTimes,
                   enrichmentDurations: result.enrichmentTimes,
                 },
+                userError: result.userError,
               });
             }
           } catch (error) {

diff --git a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/eql/eql.test.ts b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/eql/eql.test.ts
@@ -66,5 +66,29 @@ describe('eql_executor', () => {
         }`,
       ]);
     });
+
+    it('should classify EQL verification exceptions as "user errors" when reporting to the framework', async () => {
+      alertServices.scopedClusterClient.asCurrentUser.eql.search.mockRejectedValue({
+        name: 'ResponseError',
+        message:
+          'verification_exception\n\tRoot causes:\n\t\tverification_exception: Found 1 problem\nline 1:1: Unknown column [event.category]',
+      });
+      const result = await eqlExecutor({
+        inputIndex: DEFAULT_INDEX_PATTERN,
+        runtimeMappings: {},
+        completeRule: eqlCompleteRule,
+        tuple,
+        ruleExecutionLogger,
+        services: alertServices,
+        version,
+        bulkCreate: jest.fn(),
+        wrapHits: jest.fn(),
+        wrapSequences: jest.fn(),
+        primaryTimestamp: '@timestamp',
+        exceptionFilter: undefined,
+        unprocessedExceptions: [],
+      });
+      expect(result.userError).toEqual(true);
+    });
   });
 });
diff --git a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/eql/eql.ts b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/eql/eql.ts
@@ -100,40 +100,51 @@ export const eqlExecutor = async ({
     }
     const eqlSignalSearchStart = performance.now();
 
-    const response = await services.scopedClusterClient.asCurrentUser.eql.search<SignalSource>(
-      request
-    );
+    try {
+      const response = await services.scopedClusterClient.asCurrentUser.eql.search<SignalSource>(
+        request
+      );
 
-    const eqlSignalSearchEnd = performance.now();
-    const eqlSearchDuration = makeFloatString(eqlSignalSearchEnd - eqlSignalSearchStart);
-    result.searchAfterTimes = [eqlSearchDuration];
+      const eqlSignalSearchEnd = performance.now();
+      const eqlSearchDuration = makeFloatString(eqlSignalSearchEnd - eqlSignalSearchStart);
+      result.searchAfterTimes = [eqlSearchDuration];
 
-    let newSignals: Array<WrappedFieldsLatest<BaseFieldsLatest>> | undefined;
-    if (response.hits.sequences !== undefined) {
-      newSignals = wrapSequences(response.hits.sequences, buildReasonMessageForEqlAlert);
-    } else if (response.hits.events !== undefined) {
-      newSignals = wrapHits(response.hits.events, buildReasonMessageForEqlAlert);
-    } else {
-      throw new Error(
-        'eql query response should have either `sequences` or `events` but had neither'
-      );
-    }
+      let newSignals: Array<WrappedFieldsLatest<BaseFieldsLatest>> | undefined;
+      if (response.hits.sequences !== undefined) {
+        newSignals = wrapSequences(response.hits.sequences, buildReasonMessageForEqlAlert);
+      } else if (response.hits.events !== undefined) {
+        newSignals = wrapHits(response.hits.events, buildReasonMessageForEqlAlert);
+      } else {
+        throw new Error(
+          'eql query response should have either `sequences` or `events` but had neither'
+        );
+      }
 
-    if (newSignals?.length) {
-      const createResult = await bulkCreate(
-        newSignals,
-        undefined,
-        createEnrichEventsFunction({
-          services,
-          logger: ruleExecutionLogger,
-        })
-      );
+      if (newSignals?.length) {
+        const createResult = await bulkCreate(
+          newSignals,
+          undefined,
+          createEnrichEventsFunction({
+            services,
+            logger: ruleExecutionLogger,
+          })
+        );
 
-      addToSearchAfterReturn({ current: result, next: createResult });
-    }
-    if (response.hits.total && response.hits.total.value >= ruleParams.maxSignals) {
-      result.warningMessages.push(getMaxSignalsWarning());
+        addToSearchAfterReturn({ current: result, next: createResult });
+      }
+      if (response.hits.total && response.hits.total.value >= ruleParams.maxSignals) {
+        result.warningMessages.push(getMaxSignalsWarning());
+      }
+      return result;
+    } catch (error) {
+      if ((error.message as string).includes('verification_exception')) {
+        // We report errors that are more related to user configuration of rules rather than system outages as "user errors"
+        // so SLO dashboards can show less noise around system outages
+        result.userError = true;
+      }
+      result.errors.push(error.message);
+      result.success = false;
+      return result;
     }
-    return result;
   });
 };
diff --git a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/ml/ml.test.ts b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/ml/ml.test.ts
@@ -64,6 +64,7 @@ describe('ml_executor', () => {
       errors: [],
       createdItems: [],
     });
+    jobsSummaryMock.mockResolvedValue([]);
   });
 
   it('should throw an error if ML plugin was not available', async () => {
@@ -131,4 +132,26 @@ describe('ml_executor', () => {
     );
     expect(response.warningMessages.length).toEqual(1);
   });
+
+  it('should report job missing errors as user errors', async () => {
+    (findMlSignals as jest.Mock).mockRejectedValue({
+      message: 'my_test_job_name missing',
+    });
+
+    const result = await mlExecutor({
+      completeRule: mlCompleteRule,
+      tuple,
+      ml: mlMock,
+      services: alertServices,
+      ruleExecutionLogger,
+      listClient,
+      bulkCreate: jest.fn(),
+      wrapHits: jest.fn(),
+      exceptionFilter: undefined,
+      unprocessedExceptions: [],
+    });
+    expect(result.userError).toEqual(true);
+    expect(result.success).toEqual(false);
+    expect(result.errors).toEqual(['my_test_job_name missing']);
+  });
 });
diff --git a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/ml/ml.ts b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/ml/ml.ts
@@ -5,6 +5,8 @@
  * 2.0.
  */
 
+/* eslint require-atomic-updates: ["error", { "allowProperties": true }] */
+
 import type { KibanaRequest } from '@kbn/core/server';
 import type { ExceptionListItemSchema } from '@kbn/securitysolution-io-ts-list-types';
 import type {
@@ -30,6 +32,7 @@ import {
 import type { SetupPlugins } from '../../../../plugin';
 import { withSecuritySpan } from '../../../../utils/with_security_span';
 import type { IRuleExecutionLogForExecutors } from '../../rule_monitoring';
+import type { AnomalyResults } from '../../../machine_learning';
 
 export const mlExecutor = async ({
   completeRule,
@@ -93,19 +96,29 @@ export const mlExecutor = async ({
       result.warning = true;
     }
 
-    const anomalyResults = await findMlSignals({
-      ml,
-      // Using fake KibanaRequest as it is needed to satisfy the ML Services API, but can be empty as it is
-      // currently unused by the mlAnomalySearch function.
-      request: {} as unknown as KibanaRequest,
-      savedObjectsClient: services.savedObjectsClient,
-      jobIds: ruleParams.machineLearningJobId,
-      anomalyThreshold: ruleParams.anomalyThreshold,
-      from: tuple.from.toISOString(),
-      to: tuple.to.toISOString(),
-      maxSignals: tuple.maxSignals,
-      exceptionFilter,
-    });
+    let anomalyResults: AnomalyResults;
+    try {
+      anomalyResults = await findMlSignals({
+        ml,
+        // Using fake KibanaRequest as it is needed to satisfy the ML Services API, but can be empty as it is
+        // currently unused by the mlAnomalySearch function.
+        request: {} as unknown as KibanaRequest,
+        savedObjectsClient: services.savedObjectsClient,
+        jobIds: ruleParams.machineLearningJobId,
+        anomalyThreshold: ruleParams.anomalyThreshold,
+        from: tuple.from.toISOString(),
+        to: tuple.to.toISOString(),
+        maxSignals: tuple.maxSignals,
+        exceptionFilter,
+      });
+    } catch (error) {
+      if ((error.message as string).endsWith('missing')) {
+        result.userError = true;
+      }
+      result.errors.push(error.message);
+      result.success = false;
+      return result;
+    }
 
     if (
       anomalyResults.hits.total &&

diff --git a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/types.ts b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/types.ts
@@ -65,6 +65,7 @@ export interface SecurityAlertTypeReturnValue<TState extends RuleTypeState> {
   createdSignalsCount: number;
   createdSignals: unknown[];
   errors: string[];
+  userError?: boolean;
   lastLookbackDate?: Date | null;
   searchAfterTimes: string[];
   state: TState;
@@ -388,6 +389,7 @@ export interface SearchAfterAndBulkCreateReturnType {
   createdSignalsCount: number;
   createdSignals: unknown[];
   errors: string[];
+  userError?: boolean;
   warningMessages: string[];
   suppressedAlertsCount?: number;
 }

@@ -6,6 +6,8 @@
  */
 
 import { v4 as uuidv4 } from 'uuid';
+import supertestLib from 'supertest';
+import url from 'url';
 import expect from '@kbn/expect';
 import {
   ALERT_REASON,
@@ -30,7 +32,10 @@ import {
   ALERT_GROUP_ID,
 } from '@kbn/security-solution-plugin/common/field_maps/field_names';
 import { getMaxSignalsWarning as getMaxAlertsWarning } from '@kbn/security-solution-plugin/server/lib/detection_engine/rule_types/utils/utils';
-import { ENABLE_ASSET_CRITICALITY_SETTING } from '@kbn/security-solution-plugin/common/constants';
+import {
+  DETECTION_ENGINE_RULES_URL,
+  ENABLE_ASSET_CRITICALITY_SETTING,
+} from '@kbn/security-solution-plugin/common/constants';
 import {
   getEqlRuleForAlertTesting,
   getOpenAlerts,
@@ -42,6 +47,8 @@ import {
   createRule,
   deleteAllRules,
   deleteAllAlerts,
+  waitForRuleFailure,
+  routeWithNamespace,
 } from '../../../../../../../common/utils/security_solution';
 import { FtrProviderContext } from '../../../../../../ftr_provider_context';
 import { EsArchivePathBuilder } from '../../../../../../es_archive_path_builder';
@@ -62,6 +69,7 @@ export default ({ getService }: FtrProviderContext) => {
 
   // TODO: add a new service for loading archiver files similar to "getService('es')"
   const config = getService('config');
+  const request = supertestLib(url.format(config.get('servers.kibana')));
   const isServerless = config.get('serverless');
   const dataPathBuilder = new EsArchivePathBuilder(isServerless);
   const auditPath = dataPathBuilder.getPath('auditbeat/hosts');
@@ -196,6 +204,42 @@ export default ({ getService }: FtrProviderContext) => {
       });
     });
 
+    it('classifies verification_exception errors as user errors', async () => {
+      function getMetricsRequest(reset: boolean = false) {
+        return request
+          .get(`/api/task_manager/metrics${reset ? '' : '?reset=false'}`)
+          .set('kbn-xsrf', 'foo')
+          .expect(200)
+          .then((response) => response.body);
+      }
+
+      await getMetricsRequest(true);
+      const rule: EqlRuleCreateProps = {
+        ...getEqlRuleForAlertTesting(['auditbeat-*']),
+        query: 'file where field.doesnt.exist == true',
+      };
+      const createdRule = await createRule(supertest, log, rule);
+      await waitForRuleFailure({ supertest, log, id: createdRule.id });
+
+      const route = routeWithNamespace(DETECTION_ENGINE_RULES_URL);
+      const response = await supertest
+        .get(route)
+        .set('kbn-xsrf', 'true')
+        .set('elastic-api-version', '2023-10-31')
+        .query({ id: createdRule.id })
+        .expect(200);
+
+      const ruleResponse = response.body;
+      expect(
+        ruleResponse.execution_summary.last_execution.message.includes('verification_exception')
+      ).eql(true);
+
+      const metricsResponse = await getMetricsRequest();
+      expect(
+        metricsResponse.metrics.task_run.value.by_type['alerting:siem__eqlRule'].user_errors
+      ).eql(1);
+    });
+
     it('generates up to max_alerts for non-sequence EQL queries', async () => {
       const maxAlerts = 200;
       const rule: EqlRuleCreateProps = {