Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ResponseOps][MW] Add telemetry for the maintenance window #192483

Merged
merged 26 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
763160c
total count MW telemetry
guskovaue Sep 4, 2024
3b01b9e
first method count total MW
guskovaue Sep 9, 2024
f4a882e
add positive and negative unit tests for total MW count
guskovaue Sep 10, 2024
146ecea
add telemetry for toggles and fix tests for them
guskovaue Sep 10, 2024
52438fd
fix type
guskovaue Sep 10, 2024
b292b33
checking right SO property for recurring check
guskovaue Sep 11, 2024
8819661
Merge branch 'main' into MX-184088-add-telemetry-for-mw
guskovaue Sep 11, 2024
c289c84
refresh telemetry mappings
guskovaue Sep 11, 2024
9408886
Merge branch 'MX-184088-add-telemetry-for-mw' of github.com:guskovaue…
guskovaue Sep 11, 2024
ceeb6ca
nit
guskovaue Sep 11, 2024
b6df6bd
add integrational test for MW telemetry
guskovaue Sep 12, 2024
3c365a2
fix integration test
guskovaue Sep 13, 2024
2806a04
fix await issue in find in tile generator
guskovaue Sep 13, 2024
72e7c54
fix types
guskovaue Sep 13, 2024
ce8eb77
Merge branch 'main' into MX-184088-add-telemetry-for-mw
elasticmachine Sep 13, 2024
6a62381
Merge branch 'main' into MX-184088-add-telemetry-for-mw
elasticmachine Sep 16, 2024
732374d
changes after code review
guskovaue Sep 16, 2024
c69db28
Merge branch 'MX-184088-add-telemetry-for-mw' of github.com:guskovaue…
guskovaue Sep 16, 2024
2e4440d
fix max amount
guskovaue Sep 17, 2024
2051618
fix max amount
guskovaue Sep 17, 2024
44165b9
Merge branch 'MX-184088-add-telemetry-for-mw' of github.com:guskovaue…
guskovaue Sep 17, 2024
a13b6e1
max limit
guskovaue Sep 17, 2024
4c852ba
changes after code review
guskovaue Sep 18, 2024
cfc3d0f
Merge branch 'main' into MX-184088-add-telemetry-for-mw
elasticmachine Sep 18, 2024
95ac96b
fix unit tests after last changes
guskovaue Sep 18, 2024
493290a
Merge branch 'main' into MX-184088-add-telemetry-for-mw
guskovaue Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ export function createAlertingUsageCollector(
count_rules_with_tags: 0,
count_rules_snoozed: 0,
count_rules_muted: 0,
count_mw_total: 0,
count_mw_with_repeat_toggle_on: 0,
count_mw_with_filter_alert_toggle_on: 0,
count_rules_with_muted_alerts: 0,
count_connector_types_by_consumers: {},
count_rules_by_execution_status_per_day: {},
Expand Down Expand Up @@ -289,6 +292,9 @@ export function createAlertingUsageCollector(
count_rules_by_notify_when: byNotifyWhenSchema,
count_rules_snoozed: { type: 'long' },
count_rules_muted: { type: 'long' },
count_mw_total: { type: 'long' },
count_mw_with_repeat_toggle_on: { type: 'long' },
count_mw_with_filter_alert_toggle_on: { type: 'long' },
count_rules_with_muted_alerts: { type: 'long' },
count_connector_types_by_consumers: { DYNAMIC_KEY: { DYNAMIC_KEY: { type: 'long' } } },
count_rules_by_execution_status_per_day: byStatusPerDaySchema,
Expand Down
8 changes: 8 additions & 0 deletions x-pack/plugins/alerting/server/usage/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const TELEMETRY_MW_COUNT_LIMIT = 10000;
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,100 @@
*/

import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
import { getTotalCountAggregations, getTotalCountInUse } from './get_telemetry_from_kibana';
import {
getTotalCountAggregations,
getTotalCountInUse,
getMWTelemetry,
} from './get_telemetry_from_kibana';
import { savedObjectsClientMock } from '@kbn/core/server/mocks';
import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common';
import { ISavedObjectsRepository } from '@kbn/core/server';

const constants = jest.requireMock('../constants');
jest.mock('../constants');

const elasticsearch = elasticsearchServiceMock.createStart();
const esClient = elasticsearch.client.asInternalUser;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();
const savedObjectsClient = savedObjectsClientMock.create() as unknown as ISavedObjectsRepository;
const thrownError = new Error('Fail');

const mockedResponse = {
saved_objects: [
{
id: '1',
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
attributes: {
title: 'test_rule_1',
enabled: true,
duration: 1800000,
expirationDate: '2025-09-09T13:13:07.824Z',
events: [],
rRule: {
dtstart: '2024-09-09T13:13:02.054Z',
tzid: 'Europe/Stockholm',
freq: 0,
count: 1,
},
createdBy: null,
updatedBy: null,
createdAt: '2024-09-09T13:13:07.825Z',
updatedAt: '2024-09-09T13:13:07.825Z',
scopedQuery: null,
},
},
{
id: '2',
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
attributes: {
title: 'test_rule_2',
enabled: true,
duration: 1800000,
expirationDate: '2025-09-09T13:13:07.824Z',
events: [],
rRule: {
dtstart: '2024-09-09T13:13:02.054Z',
tzid: 'Europe/Stockholm',
freq: 3,
interval: 1,
byweekday: ['SU'],
},
createdBy: null,
updatedBy: null,
createdAt: '2024-09-09T13:13:07.825Z',
updatedAt: '2024-09-09T13:13:07.825Z',
scopedQuery: {
filters: [],
kql: 'kibana.alert.job_errors_results.job_id : * ',
dsl: '{"bool":{"must":[],"filter":[{"bool":{"should":[{"exists":{"field":"kibana.alert.job_errors_results.job_id"}}],"minimum_should_match":1}}],"should":[],"must_not":[]}}',
},
},
},
{
id: '3',
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
attributes: {
title: 'test_rule_3',
enabled: true,
duration: 1800000,
expirationDate: '2025-09-09T13:13:07.824Z',
events: [],
rRule: {
dtstart: '2024-09-09T13:13:02.054Z',
tzid: 'Europe/Stockholm',
freq: 3,
interval: 1,
byweekday: ['TU'],
},
createdBy: null,
updatedBy: null,
createdAt: '2024-09-09T13:13:07.825Z',
updatedAt: '2024-09-09T13:13:07.825Z',
scopedQuery: null,
},
},
],
};

describe('kibana index telemetry', () => {
beforeEach(() => {
Expand Down Expand Up @@ -420,4 +509,91 @@ describe('kibana index telemetry', () => {
});
});
});

describe('getMWTelemetry', () => {
test('should return MW telemetry', async () => {
savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({
close: jest.fn(),
find: jest.fn().mockImplementation(async function* () {
yield mockedResponse;
}),
});
const telemetry = await getMWTelemetry({
savedObjectsClient,
logger,
});

expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
namespaces: ['*'],
perPage: 100,
});
expect(telemetry).toStrictEqual({
count_mw_total: 3,
count_mw_with_repeat_toggle_on: 2,
count_mw_with_filter_alert_toggle_on: 1,
hasErrors: false,
});
});
});

test('should throw the error', async () => {
savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({
close: jest.fn(),
find: jest.fn().mockImplementation(async function* () {
throw thrownError;
}),
});

const telemetry = await getMWTelemetry({
savedObjectsClient,
logger,
});

expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
namespaces: ['*'],
perPage: 100,
});

expect(telemetry).toStrictEqual({
count_mw_total: 0,
count_mw_with_repeat_toggle_on: 0,
count_mw_with_filter_alert_toggle_on: 0,
hasErrors: true,
errorMessage: 'Fail',
});
expect(logger.warn).toHaveBeenCalled();
const loggerCall = logger.warn.mock.calls[0][0];
const loggerMeta = logger.warn.mock.calls[0][1];
expect(loggerCall).toBe('Error executing alerting telemetry task: getTotalMWCount - {}');
expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']);
expect(loggerMeta?.error?.stack_trace).toBeDefined();
});

test('should stop on MW max limit count', async () => {
constants.TELEMETRY_MW_COUNT_LIMIT = 1
Copy link
Member

@cnasikas cnasikas Sep 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Instead of mocking the constants what do you think of having the getMWTelemetry to accept the count limit as a parameter? Then in your tests, you can pass 1 as a limit. You can have defaults like getMWTelemetry({ savedObjectsClient, logger, maxDocuments = TELEMETRY_MW_COUNT_LIMIT})

savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({
close: jest.fn(),
find: jest.fn().mockImplementation(async function* () {
yield mockedResponse;
}),
});
const telemetry = await getMWTelemetry({
savedObjectsClient,
logger,
});

expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
namespaces: ['*'],
perPage: 100,
});
expect(telemetry).toStrictEqual({
count_mw_total: 2,
count_mw_with_repeat_toggle_on: 1,
count_mw_with_filter_alert_toggle_on: 1,
hasErrors: false,
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import type {
AggregationsTermsAggregateBase,
AggregationsStringTermsBucketKeys,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient, Logger } from '@kbn/core/server';
import { ElasticsearchClient, Logger, ISavedObjectsRepository } from '@kbn/core/server';

import {
ConnectorsByConsumersBucket,
Expand All @@ -23,13 +23,21 @@ import { AlertingUsage } from '../types';
import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector';
import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket';
import { groupRulesBySearchType } from './group_rules_by_search_type';
import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common';
import { MaintenanceWindowAttributes } from '../../data/maintenance_window/types';
import { TELEMETRY_MW_COUNT_LIMIT } from '../constants';

interface Opts {
esClient: ElasticsearchClient;
alertIndex: string;
logger: Logger;
}

interface MWOpts {
savedObjectsClient: ISavedObjectsRepository;
logger: Logger;
}

type GetTotalCountsResults = Pick<
AlertingUsage,
| 'count_total'
Expand All @@ -48,6 +56,14 @@ type GetTotalCountsResults = Pick<
| 'connectors_per_alert'
> & { errorMessage?: string; hasErrors: boolean };

type GetMWTelemetryResults = Pick<
AlertingUsage,
'count_mw_total' | 'count_mw_with_repeat_toggle_on' | 'count_mw_with_filter_alert_toggle_on'
> & {
errorMessage?: string;
hasErrors: boolean;
};

interface GetTotalCountInUseResults {
countTotal: number;
countByType: Record<string, number>;
Expand Down Expand Up @@ -490,3 +506,58 @@ export async function getTotalCountInUse({
};
}
}

export async function getMWTelemetry({
savedObjectsClient,
logger,
}: MWOpts): Promise<GetMWTelemetryResults> {
try {
const mwFinder = savedObjectsClient.createPointInTimeFinder<MaintenanceWindowAttributes>({
type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE,
namespaces: ['*'],
perPage: 100,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: What do you think of using the field parameter to reduce the response size to only the fields we want? I think the events attribute can be quite big.

});

let countMWTotal = 0;
let countMWWithRepeatToggleON = 0;
let countMWWithFilterAlertToggleON = 0;
mwLoop: for await (const response of mwFinder.find()) {
for (const mwSavedObject of response.saved_objects) {
if (countMWTotal > TELEMETRY_MW_COUNT_LIMIT) break mwLoop
countMWTotal = countMWTotal + 1;
// scopedQuery property will be null if "Filter alerts" toggle will be off
if (mwSavedObject.attributes.scopedQuery) {
countMWWithFilterAlertToggleON = countMWWithFilterAlertToggleON + 1;
}
// interval property will be not in place if "Repeat" toggle will be off
if (Object.hasOwn(mwSavedObject.attributes.rRule, 'interval')) {
countMWWithRepeatToggleON = countMWWithRepeatToggleON + 1;
}
}
}
await mwFinder.close();

return {
hasErrors: false,
count_mw_total: countMWTotal,
count_mw_with_repeat_toggle_on: countMWWithRepeatToggleON,
count_mw_with_filter_alert_toggle_on: countMWWithFilterAlertToggleON,
};
} catch (err) {
const errorMessage = err?.message ? err.message : err.toString();
logger.warn(
`Error executing alerting telemetry task: getTotalMWCount - ${JSON.stringify(err)}`,
{
tags: ['alerting', 'telemetry-failed'],
error: { stack_trace: err?.stack },
}
);
return {
hasErrors: true,
errorMessage,
count_mw_total: 0,
count_mw_with_repeat_toggle_on: 0,
count_mw_with_filter_alert_toggle_on: 0,
};
}
}
Loading