Skip to content

Commit

Permalink
[Response Ops][Alerting] Use ES client to update rule SO at end of ru…
Browse files Browse the repository at this point in the history
…le run instead of SO client. (elastic#193341)

Resolves elastic#192397

## Summary

Updates alerting task runner end of run updates to use the ES client
update function for a true partial update instead of the saved objects
client update function that performs a GET then an update.

## To verify
Create a rule in multiple spaces and ensure they run correctly and their
execution status and monitoring history are updated at the end of each
run. Because we're performing a partial update on attributes that are
not in the AAD, the rule should continue running without any encryption
errors.

## Risk Matrix

| Risk | Probability | Severity | Mitigation/Notes |

|---------------------------|-------------|----------|-------------------------|
| Updating saved object directly using ES client will break BWC | Medium
| High | Response Ops follows an intermediate release strategy for any
changes to the rule saved object where schema changes are introduced in
an intermediate release before any changes to the saved object are
actually made in a followup release. This ensures that any rollbacks
that may be required in a release will roll back to a version that is
already aware of the new schema. The team is socialized to this strategy
as we are requiring users of the alerting framework to also follow this
strategy. This should address any backward compatibility issues that
might arise by circumventing the saved objects client update function. |
| Updating saved object directly using ES client will break AAD | Medium
| High | An explicit allowlist of non-AAD fields that are allowed to be
partially updated has been introduced and any fields not in this
allowlist will not be included in the partial update. Any updates to the
rule saved object that might break AAD would show up with > 1 execution
of a rule and we have a plethora of functional tests that rely on
multiple executions of a rule that would flag if there were issues
running due to AAD issues. |

---------

Co-authored-by: kibanamachine <[email protected]>
Co-authored-by: Elastic Machine <[email protected]>
  • Loading branch information
3 people authored Sep 30, 2024
1 parent 9f9d911 commit 05926c2
Show file tree
Hide file tree
Showing 9 changed files with 340 additions and 141 deletions.
2 changes: 1 addition & 1 deletion x-pack/plugins/alerting/server/saved_objects/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { RawRule } from '../types';
import { getImportWarnings } from './get_import_warnings';
import { isRuleExportable } from './is_rule_exportable';
import { RuleTypeRegistry } from '../rule_type_registry';
export { partiallyUpdateRule } from './partially_update_rule';
export { partiallyUpdateRule, partiallyUpdateRuleWithEs } from './partially_update_rule';
import {
RULES_SETTINGS_SAVED_OBJECT_TYPE,
MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,23 @@ import {
ISavedObjectsRepository,
SavedObjectsErrorHelpers,
} from '@kbn/core/server';

import { PartiallyUpdateableRuleAttributes, partiallyUpdateRule } from './partially_update_rule';
import { savedObjectsClientMock } from '@kbn/core/server/mocks';
import {
PartiallyUpdateableRuleAttributes,
partiallyUpdateRule,
partiallyUpdateRuleWithEs,
} from './partially_update_rule';
import { elasticsearchServiceMock, savedObjectsClientMock } from '@kbn/core/server/mocks';
import { RULE_SAVED_OBJECT_TYPE } from '.';
import { ALERTING_CASES_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server';
import { estypes } from '@elastic/elasticsearch';
import { RuleExecutionStatuses } from '@kbn/alerting-types';

const MockSavedObjectsClientContract = savedObjectsClientMock.create();
const MockISavedObjectsRepository =
MockSavedObjectsClientContract as unknown as jest.Mocked<ISavedObjectsRepository>;
const esClient = elasticsearchServiceMock.createClusterClient().asInternalUser;

describe('partially_update_rule', () => {
describe('partiallyUpdateRule', () => {
beforeEach(() => {
jest.resetAllMocks();
});
Expand Down Expand Up @@ -104,6 +111,101 @@ describe('partially_update_rule', () => {
});
});

describe('partiallyUpdateRuleWithEs', () => {
beforeEach(() => {
jest.resetAllMocks();
jest.clearAllMocks();
});

test('should work with no options', async () => {
esClient.update.mockResolvedValueOnce(MockEsUpdateResponse(MockRuleId));

await partiallyUpdateRuleWithEs(esClient, MockRuleId, DefaultAttributesForEsUpdate);
expect(esClient.update).toHaveBeenCalledTimes(1);
expect(esClient.update).toHaveBeenCalledWith({
id: `alert:${MockRuleId}`,
index: ALERTING_CASES_SAVED_OBJECT_INDEX,
doc: {
alert: DefaultAttributesForEsUpdate,
},
});
});

test('should strip unallowed attributes ', async () => {
const attributes =
AttributesForEsUpdateWithUnallowedFields as unknown as PartiallyUpdateableRuleAttributes;
esClient.update.mockResolvedValueOnce(MockEsUpdateResponse(MockRuleId));

await partiallyUpdateRuleWithEs(esClient, MockRuleId, attributes);
expect(esClient.update).toHaveBeenCalledWith({
id: `alert:${MockRuleId}`,
index: ALERTING_CASES_SAVED_OBJECT_INDEX,
doc: {
alert: DefaultAttributesForEsUpdate,
},
});
});

test('should handle ES errors', async () => {
esClient.update.mockRejectedValueOnce(new Error('wops'));

await expect(
partiallyUpdateRuleWithEs(esClient, MockRuleId, DefaultAttributes)
).rejects.toThrowError('wops');
});

test('should handle the version option', async () => {
esClient.update.mockResolvedValueOnce(MockEsUpdateResponse(MockRuleId));

await partiallyUpdateRuleWithEs(esClient, MockRuleId, DefaultAttributesForEsUpdate, {
version: 'WzQsMV0=',
});
expect(esClient.update).toHaveBeenCalledWith({
id: `alert:${MockRuleId}`,
index: ALERTING_CASES_SAVED_OBJECT_INDEX,
if_primary_term: 1,
if_seq_no: 4,
doc: {
alert: DefaultAttributesForEsUpdate,
},
});
});

test('should handle the ignore404 option', async () => {
esClient.update.mockResolvedValueOnce(MockEsUpdateResponse(MockRuleId));

await partiallyUpdateRuleWithEs(esClient, MockRuleId, DefaultAttributesForEsUpdate, {
ignore404: true,
});
expect(esClient.update).toHaveBeenCalledWith(
{
id: `alert:${MockRuleId}`,
index: ALERTING_CASES_SAVED_OBJECT_INDEX,
doc: {
alert: DefaultAttributesForEsUpdate,
},
},
{ ignore: [404] }
);
});

test('should handle the refresh option', async () => {
esClient.update.mockResolvedValueOnce(MockEsUpdateResponse(MockRuleId));

await partiallyUpdateRuleWithEs(esClient, MockRuleId, DefaultAttributesForEsUpdate, {
refresh: 'wait_for',
});
expect(esClient.update).toHaveBeenCalledWith({
id: `alert:${MockRuleId}`,
index: ALERTING_CASES_SAVED_OBJECT_INDEX,
doc: {
alert: DefaultAttributesForEsUpdate,
},
refresh: 'wait_for',
});
});
});

function getMockSavedObjectClients(): Record<
string,
jest.Mocked<SavedObjectsClientContract | ISavedObjectsRepository>
Expand All @@ -126,6 +228,50 @@ const DefaultAttributes = {

const ExtraneousAttributes = { ...DefaultAttributes, foo: 'bar' };

const DefaultAttributesForEsUpdate = {
running: false,
executionStatus: {
status: 'active' as RuleExecutionStatuses,
lastExecutionDate: '2023-01-01T08:44:40.000Z',
lastDuration: 12,
error: null,
warning: null,
},
monitoring: {
run: {
calculated_metrics: {
success_ratio: 20,
},
history: [
{
success: true,
timestamp: 1640991880000,
duration: 12,
outcome: 'success',
},
],
last_run: {
timestamp: '2023-01-01T08:44:40.000Z',
metrics: {
duration: 12,
gap_duration_s: null,
total_alerts_created: null,
total_alerts_detected: null,
total_indexing_duration_ms: null,
total_search_duration_ms: null,
},
},
},
},
};

const AttributesForEsUpdateWithUnallowedFields = {
...DefaultAttributesForEsUpdate,
alertTypeId: 'foo',
consumer: 'consumer',
randomField: 'bar',
};

const MockRuleId = 'rule-id';

const MockUpdateValue = {
Expand All @@ -137,3 +283,13 @@ const MockUpdateValue = {
},
references: [],
};

const MockEsUpdateResponse = (id: string) => ({
_index: '.kibana_alerting_cases_9.0.0_001',
_id: `alert:${id}`,
_version: 3,
result: 'updated' as estypes.Result,
_shards: { total: 1, successful: 1, failed: 0 },
_seq_no: 5,
_primary_term: 1,
});
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@

import { omit, pick } from 'lodash';
import {
ElasticsearchClient,
SavedObjectsClient,
SavedObjectsErrorHelpers,
SavedObjectsUpdateOptions,
} from '@kbn/core/server';
import { decodeRequestVersion } from '@kbn/core-saved-objects-base-server-internal';
import { ALERTING_CASES_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server';
import { RawRule } from '../types';

import {
Expand Down Expand Up @@ -67,3 +70,50 @@ export async function partiallyUpdateRule(
throw err;
}
}

// Explicit list of attributes that we allow to be partially updated
// There should be no overlap between this list and RuleAttributesIncludedInAAD or RuleAttributesToEncrypt
const RuleAttributesAllowedForPartialUpdate = [
'executionStatus',
'lastRun',
'monitoring',
'nextRun',
'running',
];

// direct, partial update to a rule saved object via ElasticsearchClient

// we do this direct partial update to avoid the overhead of the SavedObjectsClient for
// only these allow-listed fields which don't impact encryption. in addition, because these
// fields are only updated by the system user at the end of a rule run, they should not
// need to be included in any (user-centric) audit logs.
export async function partiallyUpdateRuleWithEs(
esClient: ElasticsearchClient,
id: string,
attributes: PartiallyUpdateableRuleAttributes,
options: PartiallyUpdateRuleSavedObjectOptions = {}
): Promise<void> {
// ensure we only have the valid attributes that are not encrypted and are excluded from AAD
const attributeUpdates = omit(attributes, [
...RuleAttributesToEncrypt,
...RuleAttributesIncludedInAAD,
]);
// ensure we only have attributes that we explicitly allow to be updated
const attributesAllowedForUpdate = pick(attributeUpdates, RuleAttributesAllowedForPartialUpdate);

const updateParams = {
id: `alert:${id}`,
index: ALERTING_CASES_SAVED_OBJECT_INDEX,
...(options.version ? decodeRequestVersion(options.version) : {}),
doc: {
alert: attributesAllowedForUpdate,
},
...(options.refresh ? { refresh: options.refresh } : {}),
};

if (options.ignore404) {
await esClient.update(updateParams, { ignore: [404] });
} else {
await esClient.update(updateParams);
}
}
89 changes: 48 additions & 41 deletions x-pack/plugins/alerting/server/task_runner/fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import { TaskStatus } from '@kbn/task-manager-plugin/server';
import { SavedObject } from '@kbn/core/server';
import { ALERTING_CASES_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server';
import {
Rule,
RuleTypeParams,
Expand Down Expand Up @@ -64,7 +65,7 @@ const defaultHistory = [
},
];

export const generateSavedObjectParams = ({
export const generateRuleUpdateParams = ({
error = null,
warning = null,
status = 'ok',
Expand All @@ -83,53 +84,59 @@ export const generateSavedObjectParams = ({
history?: RuleMonitoring['run']['history'];
alertsCount?: Record<string, number>;
}) => [
RULE_SAVED_OBJECT_TYPE,
'1',
{
monitoring: {
run: {
calculated_metrics: {
success_ratio: successRatio,
id: `alert:1`,
index: ALERTING_CASES_SAVED_OBJECT_INDEX,
doc: {
alert: {
monitoring: {
run: {
calculated_metrics: {
success_ratio: successRatio,
},
history,
last_run: {
timestamp: '1970-01-01T00:00:00.000Z',
metrics: {
duration: 0,
gap_duration_s: null,
total_alerts_created: null,
total_alerts_detected: null,
total_indexing_duration_ms: null,
total_search_duration_ms: null,
},
},
},
},
history,
last_run: {
timestamp: '1970-01-01T00:00:00.000Z',
metrics: {
duration: 0,
gap_duration_s: null,
total_alerts_created: null,
total_alerts_detected: null,
total_indexing_duration_ms: null,
total_search_duration_ms: null,
executionStatus: {
error,
lastDuration: 0,
lastExecutionDate: '1970-01-01T00:00:00.000Z',
status,
warning,
},
lastRun: {
outcome,
outcomeOrder: RuleLastRunOutcomeOrderMap[outcome],
outcomeMsg:
(error?.message && [error?.message]) ||
(warning?.message && [warning?.message]) ||
null,
warning: error?.reason || warning?.reason || null,
alertsCount: {
active: 0,
ignored: 0,
new: 0,
recovered: 0,
...(alertsCount || {}),
},
},
nextRun,
running: false,
},
},
executionStatus: {
error,
lastDuration: 0,
lastExecutionDate: '1970-01-01T00:00:00.000Z',
status,
warning,
},
lastRun: {
outcome,
outcomeOrder: RuleLastRunOutcomeOrderMap[outcome],
outcomeMsg:
(error?.message && [error?.message]) || (warning?.message && [warning?.message]) || null,
warning: error?.reason || warning?.reason || null,
alertsCount: {
active: 0,
ignored: 0,
new: 0,
recovered: 0,
...(alertsCount || {}),
},
},
nextRun,
running: false,
},
{ refresh: false, namespace: undefined },
{ ignore: [404] },
];

export const GENERIC_ERROR_MESSAGE = 'GENERIC ERROR MESSAGE';
Expand Down
Loading

0 comments on commit 05926c2

Please sign in to comment.