From 892edf80a7a7ef1adbc3ccdfac1c77d08ee3f924 Mon Sep 17 00:00:00 2001 From: Ahmad Bamieh Date: Mon, 28 Jun 2021 15:04:53 +0300 Subject: [PATCH 1/6] log event loop delay --- .../get_event_loop_threshold_log.test.ts | 43 +++++++++++++++++++ .../logging/get_event_loop_threshold_log.ts | 35 +++++++++++++++ src/core/server/metrics/logging/index.ts | 1 + .../server/metrics/metrics_service.test.ts | 43 ++++++++++++++++++- src/core/server/metrics/metrics_service.ts | 27 ++++++++---- src/core/server/metrics/ops_config.ts | 1 + 6 files changed, 141 insertions(+), 9 deletions(-) create mode 100644 src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts create mode 100644 src/core/server/metrics/logging/get_event_loop_threshold_log.ts diff --git a/src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts b/src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts new file mode 100644 index 0000000000000..6c43d2db8f94f --- /dev/null +++ b/src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts @@ -0,0 +1,43 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import type { OpsMetrics } from '..'; +import { getEventLoopThresholdLog } from './get_event_loop_threshold_log'; + +function createMockOpsMetrics(eventLoopDelay: number | undefined) { + return { + process: { + event_loop_delay: eventLoopDelay, + }, + } as Pick; +} + +describe('getEventLoopThresholdLog', () => { + it('returns empty message on undefined `process.event_loop_delay`', () => { + const thresholdMs = 200; + const mockDelay = undefined; + const result = getEventLoopThresholdLog(createMockOpsMetrics(mockDelay), thresholdMs); + expect(result.message).toBe(''); + }); + + it('returns empty message when `process.event_loop_delay` is less than threshold', () => { + const thresholdMs = 200; + const mockDelay = 190; + const result = getEventLoopThresholdLog(createMockOpsMetrics(mockDelay), thresholdMs); + expect(result.message).toBe(''); + }); + + it('returns message when `process.event_loop_delay` exceeds the threshold', () => { + const thresholdMs = 200; + const mockDelay = 500; + const result = getEventLoopThresholdLog(createMockOpsMetrics(mockDelay), thresholdMs); + expect(result.message).toMatchInlineSnapshot( + `"Event loop delay threshold exceeded 200ms. Recieved 500.000"` + ); + }); +}); diff --git a/src/core/server/metrics/logging/get_event_loop_threshold_log.ts b/src/core/server/metrics/logging/get_event_loop_threshold_log.ts new file mode 100644 index 0000000000000..d033252b5100f --- /dev/null +++ b/src/core/server/metrics/logging/get_event_loop_threshold_log.ts @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import numeral from '@elastic/numeral'; +import type { OpsMetrics } from '..'; + +/** + * Generates a message to be logged when event_loop_delay exceeds defined threshold. + * + * @internal + */ +export function getEventLoopThresholdLog( + metrics: Pick, + thresholdMs: number +) { + const { process } = metrics; + // Event loop delay is in ms + const eventLoopDelayVal = process?.event_loop_delay; + let message = ''; + + if (eventLoopDelayVal && eventLoopDelayVal > thresholdMs) { + message = `Event loop delay threshold exceeded ${thresholdMs}ms. Recieved ${numeral( + eventLoopDelayVal + ).format('0.000')}`; + } + + return { + message, + }; +} diff --git a/src/core/server/metrics/logging/index.ts b/src/core/server/metrics/logging/index.ts index 3c1144f486984..7699a5603bb2d 100644 --- a/src/core/server/metrics/logging/index.ts +++ b/src/core/server/metrics/logging/index.ts @@ -7,3 +7,4 @@ */ export { getEcsOpsMetricsLog } from './get_ops_metrics_log'; +export { getEventLoopThresholdLog } from './get_event_loop_threshold_log'; diff --git a/src/core/server/metrics/metrics_service.test.ts b/src/core/server/metrics/metrics_service.test.ts index d7de41fd7ccf7..c1bd1201f67f5 100644 --- a/src/core/server/metrics/metrics_service.test.ts +++ b/src/core/server/metrics/metrics_service.test.ts @@ -17,6 +17,7 @@ import { loggingSystemMock } from '../logging/logging_system.mock'; import { take } from 'rxjs/operators'; const testInterval = 100; +const testEventLoopDelayThreshold = 50; const dummyMetrics = { metricA: 'value', metricB: 'otherValue' }; @@ -30,7 +31,10 @@ describe('MetricsService', () => { jest.useFakeTimers(); const configService = configServiceMock.create({ - atPath: { interval: moment.duration(testInterval) }, + atPath: { + interval: moment.duration(testInterval), + eventLoopDelayThreshold: moment.duration(testEventLoopDelayThreshold), + }, }); const coreContext = mockCoreContext.create({ logger, configService }); metricsService = new MetricsService(coreContext); @@ -214,6 +218,43 @@ describe('MetricsService', () => { ] `); }); + + it('warns when process.event_loop_delay exceeds threshold', async () => { + const firstMetrics = { + process: { + event_loop_delay: 30, + }, + }; + const secondMetrics = { + process: { + event_loop_delay: 100, + }, + }; + + const opsLogger = logger.get('metrics', 'ops'); + + mockOpsCollector.collect + .mockResolvedValueOnce(firstMetrics) + .mockResolvedValueOnce(secondMetrics); + await metricsService.setup({ http: httpMock }); + const { getOpsMetrics$ } = await metricsService.start(); + + const nextEmission = async () => { + jest.advanceTimersByTime(testInterval); + const emission = await getOpsMetrics$().pipe(take(1)).toPromise(); + await new Promise((resolve) => process.nextTick(resolve)); + return emission; + }; + + await nextEmission(); + const opsLogs = loggingSystemMock.collect(opsLogger).warn; + expect(opsLogs.length).toEqual(1); + expect(opsLogs[0]).toMatchInlineSnapshot(` + Array [ + "Event loop delay threshold exceeded 50ms. Recieved 100.000", + ] + `); + }); }); describe('#stop', () => { diff --git a/src/core/server/metrics/metrics_service.ts b/src/core/server/metrics/metrics_service.ts index 78e4dd98f93d6..613cc189e608a 100644 --- a/src/core/server/metrics/metrics_service.ts +++ b/src/core/server/metrics/metrics_service.ts @@ -15,7 +15,7 @@ import { InternalHttpServiceSetup } from '../http'; import { InternalMetricsServiceSetup, InternalMetricsServiceStart, OpsMetrics } from './types'; import { OpsMetricsCollector } from './ops_metrics_collector'; import { opsConfig, OpsConfigType } from './ops_config'; -import { getEcsOpsMetricsLog } from './logging'; +import { getEcsOpsMetricsLog, getEventLoopThresholdLog } from './logging'; interface MetricsServiceSetupDeps { http: InternalHttpServiceSetup; @@ -37,26 +37,32 @@ export class MetricsService } public async setup({ http }: MetricsServiceSetupDeps): Promise { - const config = await this.coreContext.configService + const { + cGroupOverrides, + interval, + eventLoopDelayThreshold, + } = await this.coreContext.configService .atPath(opsConfig.path) .pipe(first()) .toPromise(); + const thresholdMs = eventLoopDelayThreshold.asMilliseconds(); + this.metricsCollector = new OpsMetricsCollector(http.server, { logger: this.logger, - ...config.cGroupOverrides, + ...cGroupOverrides, }); - await this.refreshMetrics(); + await this.refreshMetrics(thresholdMs); this.collectInterval = setInterval(() => { - this.refreshMetrics(); - }, config.interval.asMilliseconds()); + this.refreshMetrics(thresholdMs); + }, interval.asMilliseconds()); const metricsObservable = this.metrics$.asObservable(); this.service = { - collectionInterval: config.interval.asMilliseconds(), + collectionInterval: interval.asMilliseconds(), getOpsMetrics$: () => metricsObservable, }; @@ -71,10 +77,15 @@ export class MetricsService return this.service; } - private async refreshMetrics() { + private async refreshMetrics(thresholdMs: number) { const metrics = await this.metricsCollector!.collect(); const { message, meta } = getEcsOpsMetricsLog(metrics); + const { message: eventLoopThresholdMessage } = getEventLoopThresholdLog(metrics, thresholdMs); + this.opsMetricsLogger.debug(message!, meta); + if (eventLoopThresholdMessage) { + this.opsMetricsLogger.warn(eventLoopThresholdMessage); + } this.metricsCollector!.reset(); this.metrics$.next(metrics); } diff --git a/src/core/server/metrics/ops_config.ts b/src/core/server/metrics/ops_config.ts index 886900b854f9f..5c18e655ff816 100644 --- a/src/core/server/metrics/ops_config.ts +++ b/src/core/server/metrics/ops_config.ts @@ -12,6 +12,7 @@ export const opsConfig = { path: 'ops', schema: schema.object({ interval: schema.duration({ defaultValue: '5s' }), + eventLoopDelayThreshold: schema.duration({ defaultValue: '350ms' }), cGroupOverrides: schema.object({ cpuPath: schema.maybe(schema.string()), cpuAcctPath: schema.maybe(schema.string()), From e22bf1211e239396046896b09468eb8db8becb15 Mon Sep 17 00:00:00 2001 From: Ahmad Bamieh Date: Mon, 28 Jun 2021 18:28:16 +0300 Subject: [PATCH 2/6] track delay thresholds via kibana collection --- .../collectors/event_loop_delays/constants.ts | 20 +++ .../event_loop_delays.mocks.ts | 1 + .../event_loop_delays.test.ts | 74 +---------- .../event_loop_delays/event_loop_delays.ts | 41 +----- .../collectors/event_loop_delays/index.ts | 3 +- .../event_loop_delays/track_delays.test.ts | 87 +++++++++++++ .../event_loop_delays/track_delays.ts | 48 +++++++ .../event_loop_delays/track_threshold.test.ts | 121 ++++++++++++++++++ .../event_loop_delays/track_threshold.ts | 60 +++++++++ .../kibana_usage_collection/server/plugin.ts | 13 +- 10 files changed, 353 insertions(+), 115 deletions(-) create mode 100644 src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts create mode 100644 src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts create mode 100644 src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts create mode 100644 src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts index 1753c87c9d005..d6201deff5fec 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts @@ -35,3 +35,23 @@ export const MONITOR_EVENT_LOOP_DELAYS_START = 1 * 60 * 1000; * Event loop monitoring sampling rate in milliseconds. */ export const MONITOR_EVENT_LOOP_DELAYS_RESOLUTION = 10; + +/** + * Mean event loop delay threshold for logging a warning. + */ +export const MONITOR_EVENT_LOOP_WARN_THRESHOLD = 350; + +/** + * Start monitoring the event loop threshold after 1 minute + */ +export const MONITOR_EVENT_LOOP_THRESHOLD_START = 1 * 60 * 1000; + +/** + * Check the event loop utilization every 30 seconds + */ +export const MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL = 30 * 1000; + +/** + * Nanosecond to milisecond conversion unit + */ +export const ONE_MILLISECOND_AS_NANOSECONDS = 1000000; diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.mocks.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.mocks.ts index 6b03d3cc5cbd1..f266a27a7034f 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.mocks.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.mocks.ts @@ -17,6 +17,7 @@ export const monitorEventLoopDelay = jest.fn().mockReturnValue({ percentile: mockMonitorPercentile, disable: mockMonitorDisable, reset: mockMonitorReset, + ...createMockHistogram(), }); jest.doMock('perf_hooks', () => ({ diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.test.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.test.ts index d03236a9756b3..b40030e210176 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.test.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.test.ts @@ -6,8 +6,6 @@ * Side Public License, v 1. */ -import { Subject } from 'rxjs'; - import { mockMonitorEnable, mockMonitorPercentile, @@ -15,8 +13,7 @@ import { mockMonitorReset, mockMonitorDisable, } from './event_loop_delays.mocks'; -import { savedObjectsRepositoryMock } from '../../../../../core/server/mocks'; -import { startTrackingEventLoopDelaysUsage, EventLoopDelaysCollector } from './event_loop_delays'; +import { EventLoopDelaysCollector } from './event_loop_delays'; describe('EventLoopDelaysCollector', () => { jest.useFakeTimers('modern'); @@ -64,72 +61,3 @@ describe('EventLoopDelaysCollector', () => { expect(mockMonitorDisable).toBeCalledTimes(1); }); }); - -describe('startTrackingEventLoopDelaysUsage', () => { - const mockInternalRepository = savedObjectsRepositoryMock.create(); - const stopMonitoringEventLoop$ = new Subject(); - - beforeAll(() => jest.useFakeTimers('modern')); - beforeEach(() => jest.clearAllMocks()); - afterEach(() => stopMonitoringEventLoop$.next()); - - it('initializes EventLoopDelaysCollector and starts timer', () => { - const collectionStartDelay = 1000; - startTrackingEventLoopDelaysUsage( - mockInternalRepository, - stopMonitoringEventLoop$, - collectionStartDelay - ); - - expect(monitorEventLoopDelay).toBeCalledTimes(1); - expect(mockMonitorPercentile).toBeCalledTimes(0); - jest.advanceTimersByTime(collectionStartDelay); - expect(mockMonitorPercentile).toBeCalled(); - }); - - it('stores event loop delays every collectionInterval duration', () => { - const collectionStartDelay = 100; - const collectionInterval = 1000; - startTrackingEventLoopDelaysUsage( - mockInternalRepository, - stopMonitoringEventLoop$, - collectionStartDelay, - collectionInterval - ); - - expect(mockInternalRepository.create).toBeCalledTimes(0); - jest.advanceTimersByTime(collectionStartDelay); - expect(mockInternalRepository.create).toBeCalledTimes(1); - jest.advanceTimersByTime(collectionInterval); - expect(mockInternalRepository.create).toBeCalledTimes(2); - jest.advanceTimersByTime(collectionInterval); - expect(mockInternalRepository.create).toBeCalledTimes(3); - }); - - it('resets histogram every histogramReset duration', () => { - const collectionStartDelay = 0; - const collectionInterval = 1000; - const histogramReset = 5000; - startTrackingEventLoopDelaysUsage( - mockInternalRepository, - stopMonitoringEventLoop$, - collectionStartDelay, - collectionInterval, - histogramReset - ); - - expect(mockMonitorReset).toBeCalledTimes(0); - jest.advanceTimersByTime(collectionInterval * 5); - expect(mockMonitorReset).toBeCalledTimes(1); - jest.advanceTimersByTime(collectionInterval * 5); - expect(mockMonitorReset).toBeCalledTimes(2); - }); - - it('stops monitoring event loop delays once stopMonitoringEventLoop$.next is called', () => { - startTrackingEventLoopDelaysUsage(mockInternalRepository, stopMonitoringEventLoop$); - - expect(mockMonitorDisable).toBeCalledTimes(0); - stopMonitoringEventLoop$.next(); - expect(mockMonitorDisable).toBeCalledTimes(1); - }); -}); diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.ts index 655cba580fc5d..f5de44a061d5a 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/event_loop_delays.ts @@ -8,16 +8,7 @@ import type { EventLoopDelayMonitor } from 'perf_hooks'; import { monitorEventLoopDelay } from 'perf_hooks'; -import { takeUntil, finalize, map } from 'rxjs/operators'; -import { Observable, timer } from 'rxjs'; -import type { ISavedObjectsRepository } from 'kibana/server'; -import { - MONITOR_EVENT_LOOP_DELAYS_START, - MONITOR_EVENT_LOOP_DELAYS_INTERVAL, - MONITOR_EVENT_LOOP_DELAYS_RESET, - MONITOR_EVENT_LOOP_DELAYS_RESOLUTION, -} from './constants'; -import { storeHistogram } from './saved_objects'; +import { MONITOR_EVENT_LOOP_DELAYS_RESOLUTION } from './constants'; export interface IntervalHistogram { fromTimestamp: string; @@ -77,33 +68,3 @@ export class EventLoopDelaysCollector { this.loopMonitor.disable(); } } - -/** - * The monitoring of the event loop starts immediately. - * The first collection of the histogram happens after 1 minute. - * The daily histogram data is updated every 1 hour. - */ -export function startTrackingEventLoopDelaysUsage( - internalRepository: ISavedObjectsRepository, - stopMonitoringEventLoop$: Observable, - collectionStartDelay = MONITOR_EVENT_LOOP_DELAYS_START, - collectionInterval = MONITOR_EVENT_LOOP_DELAYS_INTERVAL, - histogramReset = MONITOR_EVENT_LOOP_DELAYS_RESET -) { - const eventLoopDelaysCollector = new EventLoopDelaysCollector(); - - const resetOnCount = Math.ceil(histogramReset / collectionInterval); - timer(collectionStartDelay, collectionInterval) - .pipe( - map((i) => (i + 1) % resetOnCount === 0), - takeUntil(stopMonitoringEventLoop$), - finalize(() => eventLoopDelaysCollector.stop()) - ) - .subscribe(async (shouldReset) => { - const histogram = eventLoopDelaysCollector.collect(); - if (shouldReset) { - eventLoopDelaysCollector.reset(); - } - await storeHistogram(histogram, internalRepository); - }); -} diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/index.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/index.ts index 693b173c2759e..17b23e723c804 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/index.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/index.ts @@ -6,6 +6,7 @@ * Side Public License, v 1. */ -export { startTrackingEventLoopDelaysUsage } from './event_loop_delays'; export { registerEventLoopDelaysCollector } from './event_loop_delays_usage_collector'; +export { startTrackingEventLoopDelaysThreshold } from './track_threshold'; +export { startTrackingEventLoopDelaysUsage } from './track_delays'; export { SAVED_OBJECTS_DAILY_TYPE } from './saved_objects'; diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts new file mode 100644 index 0000000000000..351ee9e9ffa56 --- /dev/null +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { Subject } from 'rxjs'; + +import { + mockMonitorPercentile, + monitorEventLoopDelay, + mockMonitorReset, + mockMonitorDisable, +} from './event_loop_delays.mocks'; +import { savedObjectsRepositoryMock } from '../../../../../core/server/mocks'; +import { startTrackingEventLoopDelaysUsage } from './track_delays'; + +describe('startTrackingEventLoopDelaysUsage', () => { + const mockInternalRepository = savedObjectsRepositoryMock.create(); + const stopMonitoringEventLoop$ = new Subject(); + + beforeAll(() => jest.useFakeTimers('modern')); + beforeEach(() => jest.clearAllMocks()); + afterEach(() => stopMonitoringEventLoop$.next()); + + it('initializes EventLoopDelaysCollector and starts timer', () => { + const collectionStartDelay = 1000; + startTrackingEventLoopDelaysUsage( + mockInternalRepository, + stopMonitoringEventLoop$, + collectionStartDelay + ); + + expect(monitorEventLoopDelay).toBeCalledTimes(1); + expect(mockMonitorPercentile).toBeCalledTimes(0); + jest.advanceTimersByTime(collectionStartDelay); + expect(mockMonitorPercentile).toBeCalled(); + }); + + it('stores event loop delays every collectionInterval duration', () => { + const collectionStartDelay = 100; + const collectionInterval = 1000; + startTrackingEventLoopDelaysUsage( + mockInternalRepository, + stopMonitoringEventLoop$, + collectionStartDelay, + collectionInterval + ); + + expect(mockInternalRepository.create).toBeCalledTimes(0); + jest.advanceTimersByTime(collectionStartDelay); + expect(mockInternalRepository.create).toBeCalledTimes(1); + jest.advanceTimersByTime(collectionInterval); + expect(mockInternalRepository.create).toBeCalledTimes(2); + jest.advanceTimersByTime(collectionInterval); + expect(mockInternalRepository.create).toBeCalledTimes(3); + }); + + it('resets histogram every histogramReset duration', () => { + const collectionStartDelay = 0; + const collectionInterval = 1000; + const histogramReset = 5000; + startTrackingEventLoopDelaysUsage( + mockInternalRepository, + stopMonitoringEventLoop$, + collectionStartDelay, + collectionInterval, + histogramReset + ); + + expect(mockMonitorReset).toBeCalledTimes(0); + jest.advanceTimersByTime(collectionInterval * 5); + expect(mockMonitorReset).toBeCalledTimes(1); + jest.advanceTimersByTime(collectionInterval * 5); + expect(mockMonitorReset).toBeCalledTimes(2); + }); + + it('stops monitoring event loop delays once stopMonitoringEventLoop$.next is called', () => { + startTrackingEventLoopDelaysUsage(mockInternalRepository, stopMonitoringEventLoop$); + + expect(mockMonitorDisable).toBeCalledTimes(0); + stopMonitoringEventLoop$.next(); + expect(mockMonitorDisable).toBeCalledTimes(1); + }); +}); diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts new file mode 100644 index 0000000000000..28850d44f12eb --- /dev/null +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { takeUntil, finalize, map } from 'rxjs/operators'; +import { Observable, timer } from 'rxjs'; +import type { ISavedObjectsRepository } from 'kibana/server'; +import { + MONITOR_EVENT_LOOP_DELAYS_START, + MONITOR_EVENT_LOOP_DELAYS_INTERVAL, + MONITOR_EVENT_LOOP_DELAYS_RESET, +} from './constants'; +import { storeHistogram } from './saved_objects'; +import { EventLoopDelaysCollector } from './event_loop_delays'; + +/** + * The monitoring of the event loop starts immediately. + * The first collection of the histogram happens after 1 minute. + * The daily histogram data is updated every 1 hour. + */ +export function startTrackingEventLoopDelaysUsage( + internalRepository: ISavedObjectsRepository, + stopMonitoringEventLoop$: Observable, + collectionStartDelay = MONITOR_EVENT_LOOP_DELAYS_START, + collectionInterval = MONITOR_EVENT_LOOP_DELAYS_INTERVAL, + histogramReset = MONITOR_EVENT_LOOP_DELAYS_RESET +) { + const eventLoopDelaysCollector = new EventLoopDelaysCollector(); + + const resetOnCount = Math.ceil(histogramReset / collectionInterval); + timer(collectionStartDelay, collectionInterval) + .pipe( + map((i) => (i + 1) % resetOnCount === 0), + takeUntil(stopMonitoringEventLoop$), + finalize(() => eventLoopDelaysCollector.stop()) + ) + .subscribe(async (shouldReset) => { + const histogram = eventLoopDelaysCollector.collect(); + if (shouldReset) { + eventLoopDelaysCollector.reset(); + } + await storeHistogram(histogram, internalRepository); + }); +} diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts new file mode 100644 index 0000000000000..61be57f4bd0fd --- /dev/null +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts @@ -0,0 +1,121 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { Subject } from 'rxjs'; + +import { mockMonitorPercentile, monitorEventLoopDelay } from './event_loop_delays.mocks'; + +import { startTrackingEventLoopDelaysThreshold } from './track_threshold'; +import { loggingSystemMock } from '../../../../../core/server/mocks'; +import { usageCountersServiceMock } from '../../../../usage_collection/server/usage_counters/usage_counters_service.mock'; + +describe('startTrackingEventLoopDelaysThreshold', () => { + const logger = loggingSystemMock.createLogger(); + const stopMonitoringEventLoop$ = new Subject(); + const mockUsageCountersSetup = usageCountersServiceMock.createSetupContract(); + const mockEventLoopCounter = mockUsageCountersSetup.createUsageCounter('testCounter'); + + beforeAll(() => jest.useFakeTimers('modern')); + beforeEach(() => jest.clearAllMocks()); + afterEach(() => stopMonitoringEventLoop$.next()); + + it('initializes EventLoopDelaysCollector and starts timer', () => { + const collectionStartDelay = 1000; + const collectionWarnThreshold = 1000; + startTrackingEventLoopDelaysThreshold( + mockEventLoopCounter, + logger, + stopMonitoringEventLoop$, + collectionWarnThreshold, + collectionStartDelay + ); + + expect(monitorEventLoopDelay).toBeCalledTimes(1); + expect(mockMonitorPercentile).toBeCalledTimes(0); + jest.advanceTimersByTime(collectionStartDelay); + expect(mockMonitorPercentile).toBeCalled(); + }); + + it('logs a warning and increments usage counter when the mean delay exceeds the threshold', () => { + const collectionStartDelay = 100; + const collectionInterval = 1000; + const collectionWarnThreshold = 10; + + startTrackingEventLoopDelaysThreshold( + mockEventLoopCounter, + logger, + stopMonitoringEventLoop$, + collectionWarnThreshold, + collectionStartDelay, + collectionInterval + ); + + expect(logger.warn).toBeCalledTimes(0); + expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(0); + + jest.advanceTimersByTime(collectionStartDelay); + expect(logger.warn).toBeCalledTimes(1); + expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(1); + + jest.advanceTimersByTime(collectionInterval); + expect(logger.warn).toBeCalledTimes(2); + expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(2); + + jest.advanceTimersByTime(collectionInterval); + expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(3); + expect(logger.warn).toBeCalledTimes(3); + }); + + // it('does nothing if threshold does not exceed mean delay', () => { + // const collectionStartDelay = 100; + // const collectionInterval = 1000; + // const collectionWarnThreshold = 1000; + // startTrackingEventLoopDelaysThreshold( + // mockEventLoopCounter, + // logger, + // stopMonitoringEventLoop$, + // collectionWarnThreshold, + // collectionStartDelay + // ); + + // expect(mockInternalRepository.create).toBeCalledTimes(0); + // jest.advanceTimersByTime(collectionStartDelay); + // expect(mockInternalRepository.create).toBeCalledTimes(1); + // jest.advanceTimersByTime(collectionInterval); + // expect(mockInternalRepository.create).toBeCalledTimes(2); + // jest.advanceTimersByTime(collectionInterval); + // expect(mockInternalRepository.create).toBeCalledTimes(3); + // }); + + // it('resets histogram every histogramReset duration', () => { + // const collectionStartDelay = 0; + // const collectionInterval = 1000; + // const histogramReset = 5000; + // startTrackingEventLoopDelaysUsage( + // mockInternalRepository, + // stopMonitoringEventLoop$, + // collectionStartDelay, + // collectionInterval, + // histogramReset + // ); + + // expect(mockMonitorReset).toBeCalledTimes(0); + // jest.advanceTimersByTime(collectionInterval * 5); + // expect(mockMonitorReset).toBeCalledTimes(1); + // jest.advanceTimersByTime(collectionInterval * 5); + // expect(mockMonitorReset).toBeCalledTimes(2); + // }); + + // it('stops monitoring event loop delays once stopMonitoringEventLoop$.next is called', () => { + // startTrackingEventLoopDelaysUsage(mockInternalRepository, stopMonitoringEventLoop$); + + // expect(mockMonitorDisable).toBeCalledTimes(0); + // stopMonitoringEventLoop$.next(); + // expect(mockMonitorDisable).toBeCalledTimes(1); + // }); +}); diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts new file mode 100644 index 0000000000000..bce99e995b8ec --- /dev/null +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts @@ -0,0 +1,60 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { takeUntil, finalize } from 'rxjs/operators'; +import { Observable, timer } from 'rxjs'; +import type { Logger } from 'kibana/server'; +import moment from 'moment'; +import type { UsageCounter } from '../../../../usage_collection/server'; +import { + MONITOR_EVENT_LOOP_THRESHOLD_START, + MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL, + MONITOR_EVENT_LOOP_WARN_THRESHOLD, + ONE_MILLISECOND_AS_NANOSECONDS, +} from './constants'; +import { EventLoopDelaysCollector } from './event_loop_delays'; + +/** + * The monitoring of the event loop starts immediately. + * The first collection happens after 1 minute. + * The histogram is collected and reset every 20 seconds. + * logs a warning when threshold is exceeded (350ms) and increments a usage counter. + */ +export function startTrackingEventLoopDelaysThreshold( + eventLoopCounter: UsageCounter, + logger: Logger, + stopMonitoringEventLoop$: Observable, + warnThreshold = MONITOR_EVENT_LOOP_WARN_THRESHOLD, + collectionStartDelay = MONITOR_EVENT_LOOP_THRESHOLD_START, + collectionInterval = MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL +) { + const eventLoopDelaysCollector = new EventLoopDelaysCollector(); + timer(collectionStartDelay, collectionInterval) + .pipe( + takeUntil(stopMonitoringEventLoop$), + finalize(() => eventLoopDelaysCollector.stop()) + ) + .subscribe(async () => { + const { mean } = eventLoopDelaysCollector.collect(); + const meanDurationMs = moment + .duration(mean / ONE_MILLISECOND_AS_NANOSECONDS) + .asMilliseconds(); + + if (meanDurationMs > warnThreshold) { + logger.warn( + `Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanDurationMs}ms.` + ); + + eventLoopCounter.incrementCounter({ + counterName: 'delay_threshold_exceeded', + }); + } + + eventLoopDelaysCollector.reset(); + }); +} diff --git a/src/plugins/kibana_usage_collection/server/plugin.ts b/src/plugins/kibana_usage_collection/server/plugin.ts index 4ec717c48610e..dadb4283e84a7 100644 --- a/src/plugins/kibana_usage_collection/server/plugin.ts +++ b/src/plugins/kibana_usage_collection/server/plugin.ts @@ -6,7 +6,7 @@ * Side Public License, v 1. */ -import type { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; +import type { UsageCollectionSetup, UsageCounter } from 'src/plugins/usage_collection/server'; import { Subject, Observable } from 'rxjs'; import type { PluginInitializerContext, @@ -24,6 +24,7 @@ import type { import { SavedObjectsClient } from '../../../core/server'; import { startTrackingEventLoopDelaysUsage, + startTrackingEventLoopDelaysThreshold, SAVED_OBJECTS_DAILY_TYPE, } from './collectors/event_loop_delays'; import { @@ -59,6 +60,7 @@ export class KibanaUsageCollectionPlugin implements Plugin { private uiSettingsClient?: IUiSettingsClient; private metric$: Subject; private coreUsageData?: CoreUsageDataStart; + private eventLoopUsageCounter?: UsageCounter; private pluginStop$: Subject; constructor(initializerContext: PluginInitializerContext) { @@ -70,6 +72,7 @@ export class KibanaUsageCollectionPlugin implements Plugin { public setup(coreSetup: CoreSetup, { usageCollection }: KibanaUsageCollectionPluginsDepsSetup) { usageCollection.createUsageCounter('uiCounters'); + this.eventLoopUsageCounter = usageCollection.createUsageCounter('eventLoop'); this.registerUsageCollectors( usageCollection, coreSetup, @@ -80,6 +83,9 @@ export class KibanaUsageCollectionPlugin implements Plugin { } public start(core: CoreStart) { + if (!this.eventLoopUsageCounter) { + throw new Error('#setup must be called first'); + } const { savedObjects, uiSettings } = core; this.savedObjectsClient = savedObjects.createInternalRepository([SAVED_OBJECTS_DAILY_TYPE]); const savedObjectsClient = new SavedObjectsClient(this.savedObjectsClient); @@ -87,6 +93,11 @@ export class KibanaUsageCollectionPlugin implements Plugin { core.metrics.getOpsMetrics$().subscribe(this.metric$); this.coreUsageData = core.coreUsageData; startTrackingEventLoopDelaysUsage(this.savedObjectsClient, this.pluginStop$.asObservable()); + startTrackingEventLoopDelaysThreshold( + this.eventLoopUsageCounter, + this.logger, + this.pluginStop$.asObservable() + ); } public stop() { From 60c994aa9c464e7610f610240f6f53424fce866e Mon Sep 17 00:00:00 2001 From: Ahmad Bamieh Date: Tue, 29 Jun 2021 10:38:40 +0300 Subject: [PATCH 3/6] update tests --- .../event_loop_delays/track_threshold.test.ts | 74 +++++++------------ 1 file changed, 27 insertions(+), 47 deletions(-) diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts index 61be57f4bd0fd..e1e3975467cd9 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts @@ -7,9 +7,11 @@ */ import { Subject } from 'rxjs'; - -import { mockMonitorPercentile, monitorEventLoopDelay } from './event_loop_delays.mocks'; - +import { + mockMonitorPercentile, + monitorEventLoopDelay, + mockMonitorReset, +} from './event_loop_delays.mocks'; import { startTrackingEventLoopDelaysThreshold } from './track_threshold'; import { loggingSystemMock } from '../../../../../core/server/mocks'; import { usageCountersServiceMock } from '../../../../usage_collection/server/usage_counters/usage_counters_service.mock'; @@ -57,65 +59,43 @@ describe('startTrackingEventLoopDelaysThreshold', () => { expect(logger.warn).toBeCalledTimes(0); expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(0); + expect(mockMonitorReset).toBeCalledTimes(0); jest.advanceTimersByTime(collectionStartDelay); expect(logger.warn).toBeCalledTimes(1); expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(1); + expect(mockMonitorReset).toBeCalledTimes(1); jest.advanceTimersByTime(collectionInterval); expect(logger.warn).toBeCalledTimes(2); expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(2); + expect(mockMonitorReset).toBeCalledTimes(2); jest.advanceTimersByTime(collectionInterval); expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(3); expect(logger.warn).toBeCalledTimes(3); + expect(mockMonitorReset).toBeCalledTimes(3); }); - // it('does nothing if threshold does not exceed mean delay', () => { - // const collectionStartDelay = 100; - // const collectionInterval = 1000; - // const collectionWarnThreshold = 1000; - // startTrackingEventLoopDelaysThreshold( - // mockEventLoopCounter, - // logger, - // stopMonitoringEventLoop$, - // collectionWarnThreshold, - // collectionStartDelay - // ); - - // expect(mockInternalRepository.create).toBeCalledTimes(0); - // jest.advanceTimersByTime(collectionStartDelay); - // expect(mockInternalRepository.create).toBeCalledTimes(1); - // jest.advanceTimersByTime(collectionInterval); - // expect(mockInternalRepository.create).toBeCalledTimes(2); - // jest.advanceTimersByTime(collectionInterval); - // expect(mockInternalRepository.create).toBeCalledTimes(3); - // }); - - // it('resets histogram every histogramReset duration', () => { - // const collectionStartDelay = 0; - // const collectionInterval = 1000; - // const histogramReset = 5000; - // startTrackingEventLoopDelaysUsage( - // mockInternalRepository, - // stopMonitoringEventLoop$, - // collectionStartDelay, - // collectionInterval, - // histogramReset - // ); + it('does not log warning or increment usage if threshold did not exceed mean delay', () => { + const collectionStartDelay = 100; + const collectionWarnThreshold = 15; - // expect(mockMonitorReset).toBeCalledTimes(0); - // jest.advanceTimersByTime(collectionInterval * 5); - // expect(mockMonitorReset).toBeCalledTimes(1); - // jest.advanceTimersByTime(collectionInterval * 5); - // expect(mockMonitorReset).toBeCalledTimes(2); - // }); + startTrackingEventLoopDelaysThreshold( + mockEventLoopCounter, + logger, + stopMonitoringEventLoop$, + collectionWarnThreshold, + collectionStartDelay + ); - // it('stops monitoring event loop delays once stopMonitoringEventLoop$.next is called', () => { - // startTrackingEventLoopDelaysUsage(mockInternalRepository, stopMonitoringEventLoop$); + expect(logger.warn).toBeCalledTimes(0); + expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(0); + expect(mockMonitorReset).toBeCalledTimes(0); - // expect(mockMonitorDisable).toBeCalledTimes(0); - // stopMonitoringEventLoop$.next(); - // expect(mockMonitorDisable).toBeCalledTimes(1); - // }); + jest.advanceTimersByTime(collectionStartDelay); + expect(logger.warn).toBeCalledTimes(0); + expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(0); + expect(mockMonitorReset).toBeCalledTimes(1); + }); }); From 821372ec2b25b07b5e7a3fa5fcfbf58d08ccef3e Mon Sep 17 00:00:00 2001 From: Ahmad Bamieh Date: Tue, 29 Jun 2021 10:42:05 +0300 Subject: [PATCH 4/6] revert metrics changes --- .../get_event_loop_threshold_log.test.ts | 43 ------------------- .../logging/get_event_loop_threshold_log.ts | 35 --------------- src/core/server/metrics/logging/index.ts | 1 - .../server/metrics/metrics_service.test.ts | 43 +------------------ src/core/server/metrics/metrics_service.ts | 27 ++++-------- src/core/server/metrics/ops_config.ts | 1 - 6 files changed, 9 insertions(+), 141 deletions(-) delete mode 100644 src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts delete mode 100644 src/core/server/metrics/logging/get_event_loop_threshold_log.ts diff --git a/src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts b/src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts deleted file mode 100644 index 6c43d2db8f94f..0000000000000 --- a/src/core/server/metrics/logging/get_event_loop_threshold_log.test.ts +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ - -import type { OpsMetrics } from '..'; -import { getEventLoopThresholdLog } from './get_event_loop_threshold_log'; - -function createMockOpsMetrics(eventLoopDelay: number | undefined) { - return { - process: { - event_loop_delay: eventLoopDelay, - }, - } as Pick; -} - -describe('getEventLoopThresholdLog', () => { - it('returns empty message on undefined `process.event_loop_delay`', () => { - const thresholdMs = 200; - const mockDelay = undefined; - const result = getEventLoopThresholdLog(createMockOpsMetrics(mockDelay), thresholdMs); - expect(result.message).toBe(''); - }); - - it('returns empty message when `process.event_loop_delay` is less than threshold', () => { - const thresholdMs = 200; - const mockDelay = 190; - const result = getEventLoopThresholdLog(createMockOpsMetrics(mockDelay), thresholdMs); - expect(result.message).toBe(''); - }); - - it('returns message when `process.event_loop_delay` exceeds the threshold', () => { - const thresholdMs = 200; - const mockDelay = 500; - const result = getEventLoopThresholdLog(createMockOpsMetrics(mockDelay), thresholdMs); - expect(result.message).toMatchInlineSnapshot( - `"Event loop delay threshold exceeded 200ms. Recieved 500.000"` - ); - }); -}); diff --git a/src/core/server/metrics/logging/get_event_loop_threshold_log.ts b/src/core/server/metrics/logging/get_event_loop_threshold_log.ts deleted file mode 100644 index d033252b5100f..0000000000000 --- a/src/core/server/metrics/logging/get_event_loop_threshold_log.ts +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ - -import numeral from '@elastic/numeral'; -import type { OpsMetrics } from '..'; - -/** - * Generates a message to be logged when event_loop_delay exceeds defined threshold. - * - * @internal - */ -export function getEventLoopThresholdLog( - metrics: Pick, - thresholdMs: number -) { - const { process } = metrics; - // Event loop delay is in ms - const eventLoopDelayVal = process?.event_loop_delay; - let message = ''; - - if (eventLoopDelayVal && eventLoopDelayVal > thresholdMs) { - message = `Event loop delay threshold exceeded ${thresholdMs}ms. Recieved ${numeral( - eventLoopDelayVal - ).format('0.000')}`; - } - - return { - message, - }; -} diff --git a/src/core/server/metrics/logging/index.ts b/src/core/server/metrics/logging/index.ts index 7699a5603bb2d..3c1144f486984 100644 --- a/src/core/server/metrics/logging/index.ts +++ b/src/core/server/metrics/logging/index.ts @@ -7,4 +7,3 @@ */ export { getEcsOpsMetricsLog } from './get_ops_metrics_log'; -export { getEventLoopThresholdLog } from './get_event_loop_threshold_log'; diff --git a/src/core/server/metrics/metrics_service.test.ts b/src/core/server/metrics/metrics_service.test.ts index c1bd1201f67f5..d7de41fd7ccf7 100644 --- a/src/core/server/metrics/metrics_service.test.ts +++ b/src/core/server/metrics/metrics_service.test.ts @@ -17,7 +17,6 @@ import { loggingSystemMock } from '../logging/logging_system.mock'; import { take } from 'rxjs/operators'; const testInterval = 100; -const testEventLoopDelayThreshold = 50; const dummyMetrics = { metricA: 'value', metricB: 'otherValue' }; @@ -31,10 +30,7 @@ describe('MetricsService', () => { jest.useFakeTimers(); const configService = configServiceMock.create({ - atPath: { - interval: moment.duration(testInterval), - eventLoopDelayThreshold: moment.duration(testEventLoopDelayThreshold), - }, + atPath: { interval: moment.duration(testInterval) }, }); const coreContext = mockCoreContext.create({ logger, configService }); metricsService = new MetricsService(coreContext); @@ -218,43 +214,6 @@ describe('MetricsService', () => { ] `); }); - - it('warns when process.event_loop_delay exceeds threshold', async () => { - const firstMetrics = { - process: { - event_loop_delay: 30, - }, - }; - const secondMetrics = { - process: { - event_loop_delay: 100, - }, - }; - - const opsLogger = logger.get('metrics', 'ops'); - - mockOpsCollector.collect - .mockResolvedValueOnce(firstMetrics) - .mockResolvedValueOnce(secondMetrics); - await metricsService.setup({ http: httpMock }); - const { getOpsMetrics$ } = await metricsService.start(); - - const nextEmission = async () => { - jest.advanceTimersByTime(testInterval); - const emission = await getOpsMetrics$().pipe(take(1)).toPromise(); - await new Promise((resolve) => process.nextTick(resolve)); - return emission; - }; - - await nextEmission(); - const opsLogs = loggingSystemMock.collect(opsLogger).warn; - expect(opsLogs.length).toEqual(1); - expect(opsLogs[0]).toMatchInlineSnapshot(` - Array [ - "Event loop delay threshold exceeded 50ms. Recieved 100.000", - ] - `); - }); }); describe('#stop', () => { diff --git a/src/core/server/metrics/metrics_service.ts b/src/core/server/metrics/metrics_service.ts index 613cc189e608a..78e4dd98f93d6 100644 --- a/src/core/server/metrics/metrics_service.ts +++ b/src/core/server/metrics/metrics_service.ts @@ -15,7 +15,7 @@ import { InternalHttpServiceSetup } from '../http'; import { InternalMetricsServiceSetup, InternalMetricsServiceStart, OpsMetrics } from './types'; import { OpsMetricsCollector } from './ops_metrics_collector'; import { opsConfig, OpsConfigType } from './ops_config'; -import { getEcsOpsMetricsLog, getEventLoopThresholdLog } from './logging'; +import { getEcsOpsMetricsLog } from './logging'; interface MetricsServiceSetupDeps { http: InternalHttpServiceSetup; @@ -37,32 +37,26 @@ export class MetricsService } public async setup({ http }: MetricsServiceSetupDeps): Promise { - const { - cGroupOverrides, - interval, - eventLoopDelayThreshold, - } = await this.coreContext.configService + const config = await this.coreContext.configService .atPath(opsConfig.path) .pipe(first()) .toPromise(); - const thresholdMs = eventLoopDelayThreshold.asMilliseconds(); - this.metricsCollector = new OpsMetricsCollector(http.server, { logger: this.logger, - ...cGroupOverrides, + ...config.cGroupOverrides, }); - await this.refreshMetrics(thresholdMs); + await this.refreshMetrics(); this.collectInterval = setInterval(() => { - this.refreshMetrics(thresholdMs); - }, interval.asMilliseconds()); + this.refreshMetrics(); + }, config.interval.asMilliseconds()); const metricsObservable = this.metrics$.asObservable(); this.service = { - collectionInterval: interval.asMilliseconds(), + collectionInterval: config.interval.asMilliseconds(), getOpsMetrics$: () => metricsObservable, }; @@ -77,15 +71,10 @@ export class MetricsService return this.service; } - private async refreshMetrics(thresholdMs: number) { + private async refreshMetrics() { const metrics = await this.metricsCollector!.collect(); const { message, meta } = getEcsOpsMetricsLog(metrics); - const { message: eventLoopThresholdMessage } = getEventLoopThresholdLog(metrics, thresholdMs); - this.opsMetricsLogger.debug(message!, meta); - if (eventLoopThresholdMessage) { - this.opsMetricsLogger.warn(eventLoopThresholdMessage); - } this.metricsCollector!.reset(); this.metrics$.next(metrics); } diff --git a/src/core/server/metrics/ops_config.ts b/src/core/server/metrics/ops_config.ts index 5c18e655ff816..886900b854f9f 100644 --- a/src/core/server/metrics/ops_config.ts +++ b/src/core/server/metrics/ops_config.ts @@ -12,7 +12,6 @@ export const opsConfig = { path: 'ops', schema: schema.object({ interval: schema.duration({ defaultValue: '5s' }), - eventLoopDelayThreshold: schema.duration({ defaultValue: '350ms' }), cGroupOverrides: schema.object({ cpuPath: schema.maybe(schema.string()), cpuAcctPath: schema.maybe(schema.string()), From 3e77e4fc35c9d352860b01d0acdb969b6237fe4c Mon Sep 17 00:00:00 2001 From: Ahmad Bamieh Date: Tue, 29 Jun 2021 13:48:10 +0300 Subject: [PATCH 5/6] update collection mock --- src/plugins/usage_collection/server/mocks.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/plugins/usage_collection/server/mocks.ts b/src/plugins/usage_collection/server/mocks.ts index ab7e53a7ad69b..ca3bdfe1e7522 100644 --- a/src/plugins/usage_collection/server/mocks.ts +++ b/src/plugins/usage_collection/server/mocks.ts @@ -16,7 +16,7 @@ import { import { CollectorOptions, CollectorSet } from './collector'; import { Collector } from './collector/collector'; import { UsageCollectionSetup, CollectorFetchContext } from './index'; - +import { usageCountersServiceMock } from './usage_counters/usage_counters_service.mock'; export type { CollectorOptions }; export { Collector }; @@ -25,10 +25,14 @@ export const createUsageCollectionSetupMock = () => { logger: loggingSystemMock.createLogger(), maximumWaitTimeForAllCollectorsInS: 1, }); + const { + createUsageCounter, + getUsageCounterByType, + } = usageCountersServiceMock.createSetupContract(); const usageCollectionSetupMock: jest.Mocked = { - createUsageCounter: jest.fn(), - getUsageCounterByType: jest.fn(), + createUsageCounter, + getUsageCounterByType, areAllCollectorsReady: jest.fn().mockImplementation(collectorSet.areAllCollectorsReady), bulkFetch: jest.fn().mockImplementation(collectorSet.bulkFetch), getCollectorByType: jest.fn().mockImplementation(collectorSet.getCollectorByType), From f83f9c1fefaba6fbddce1bef6e0adc58f7038e7f Mon Sep 17 00:00:00 2001 From: Ahmad Bamieh Date: Tue, 29 Jun 2021 18:47:53 +0300 Subject: [PATCH 6/6] named parameters for configs + update warning message --- .../event_loop_delays/track_delays.test.ts | 24 +++++------- .../event_loop_delays/track_delays.ts | 16 ++++++-- .../event_loop_delays/track_threshold.test.ts | 39 +++++++------------ .../event_loop_delays/track_threshold.ts | 17 ++++++-- 4 files changed, 49 insertions(+), 47 deletions(-) diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts index 351ee9e9ffa56..e0d8c20ead75a 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.test.ts @@ -27,11 +27,9 @@ describe('startTrackingEventLoopDelaysUsage', () => { it('initializes EventLoopDelaysCollector and starts timer', () => { const collectionStartDelay = 1000; - startTrackingEventLoopDelaysUsage( - mockInternalRepository, - stopMonitoringEventLoop$, - collectionStartDelay - ); + startTrackingEventLoopDelaysUsage(mockInternalRepository, stopMonitoringEventLoop$, { + collectionStartDelay, + }); expect(monitorEventLoopDelay).toBeCalledTimes(1); expect(mockMonitorPercentile).toBeCalledTimes(0); @@ -42,12 +40,10 @@ describe('startTrackingEventLoopDelaysUsage', () => { it('stores event loop delays every collectionInterval duration', () => { const collectionStartDelay = 100; const collectionInterval = 1000; - startTrackingEventLoopDelaysUsage( - mockInternalRepository, - stopMonitoringEventLoop$, + startTrackingEventLoopDelaysUsage(mockInternalRepository, stopMonitoringEventLoop$, { collectionStartDelay, - collectionInterval - ); + collectionInterval, + }); expect(mockInternalRepository.create).toBeCalledTimes(0); jest.advanceTimersByTime(collectionStartDelay); @@ -62,13 +58,11 @@ describe('startTrackingEventLoopDelaysUsage', () => { const collectionStartDelay = 0; const collectionInterval = 1000; const histogramReset = 5000; - startTrackingEventLoopDelaysUsage( - mockInternalRepository, - stopMonitoringEventLoop$, + startTrackingEventLoopDelaysUsage(mockInternalRepository, stopMonitoringEventLoop$, { collectionStartDelay, collectionInterval, - histogramReset - ); + histogramReset, + }); expect(mockMonitorReset).toBeCalledTimes(0); jest.advanceTimersByTime(collectionInterval * 5); diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts index 28850d44f12eb..70638d3b07cbc 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_delays.ts @@ -25,13 +25,21 @@ import { EventLoopDelaysCollector } from './event_loop_delays'; export function startTrackingEventLoopDelaysUsage( internalRepository: ISavedObjectsRepository, stopMonitoringEventLoop$: Observable, - collectionStartDelay = MONITOR_EVENT_LOOP_DELAYS_START, - collectionInterval = MONITOR_EVENT_LOOP_DELAYS_INTERVAL, - histogramReset = MONITOR_EVENT_LOOP_DELAYS_RESET + configs: { + collectionStartDelay?: number; + collectionInterval?: number; + histogramReset?: number; + } = {} ) { - const eventLoopDelaysCollector = new EventLoopDelaysCollector(); + const { + collectionStartDelay = MONITOR_EVENT_LOOP_DELAYS_START, + collectionInterval = MONITOR_EVENT_LOOP_DELAYS_INTERVAL, + histogramReset = MONITOR_EVENT_LOOP_DELAYS_RESET, + } = configs; + const eventLoopDelaysCollector = new EventLoopDelaysCollector(); const resetOnCount = Math.ceil(histogramReset / collectionInterval); + timer(collectionStartDelay, collectionInterval) .pipe( map((i) => (i + 1) % resetOnCount === 0), diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts index e1e3975467cd9..1ff49a735a775 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.test.ts @@ -28,14 +28,11 @@ describe('startTrackingEventLoopDelaysThreshold', () => { it('initializes EventLoopDelaysCollector and starts timer', () => { const collectionStartDelay = 1000; - const collectionWarnThreshold = 1000; - startTrackingEventLoopDelaysThreshold( - mockEventLoopCounter, - logger, - stopMonitoringEventLoop$, - collectionWarnThreshold, - collectionStartDelay - ); + const warnThreshold = 1000; + startTrackingEventLoopDelaysThreshold(mockEventLoopCounter, logger, stopMonitoringEventLoop$, { + warnThreshold, + collectionStartDelay, + }); expect(monitorEventLoopDelay).toBeCalledTimes(1); expect(mockMonitorPercentile).toBeCalledTimes(0); @@ -46,16 +43,13 @@ describe('startTrackingEventLoopDelaysThreshold', () => { it('logs a warning and increments usage counter when the mean delay exceeds the threshold', () => { const collectionStartDelay = 100; const collectionInterval = 1000; - const collectionWarnThreshold = 10; + const warnThreshold = 10; - startTrackingEventLoopDelaysThreshold( - mockEventLoopCounter, - logger, - stopMonitoringEventLoop$, - collectionWarnThreshold, + startTrackingEventLoopDelaysThreshold(mockEventLoopCounter, logger, stopMonitoringEventLoop$, { + warnThreshold, collectionStartDelay, - collectionInterval - ); + collectionInterval, + }); expect(logger.warn).toBeCalledTimes(0); expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(0); @@ -79,15 +73,12 @@ describe('startTrackingEventLoopDelaysThreshold', () => { it('does not log warning or increment usage if threshold did not exceed mean delay', () => { const collectionStartDelay = 100; - const collectionWarnThreshold = 15; + const warnThreshold = 15; - startTrackingEventLoopDelaysThreshold( - mockEventLoopCounter, - logger, - stopMonitoringEventLoop$, - collectionWarnThreshold, - collectionStartDelay - ); + startTrackingEventLoopDelaysThreshold(mockEventLoopCounter, logger, stopMonitoringEventLoop$, { + warnThreshold, + collectionStartDelay, + }); expect(logger.warn).toBeCalledTimes(0); expect(mockEventLoopCounter.incrementCounter).toBeCalledTimes(0); diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts index bce99e995b8ec..246d88496a158 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/track_threshold.ts @@ -29,10 +29,18 @@ export function startTrackingEventLoopDelaysThreshold( eventLoopCounter: UsageCounter, logger: Logger, stopMonitoringEventLoop$: Observable, - warnThreshold = MONITOR_EVENT_LOOP_WARN_THRESHOLD, - collectionStartDelay = MONITOR_EVENT_LOOP_THRESHOLD_START, - collectionInterval = MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL + configs: { + warnThreshold?: number; + collectionStartDelay?: number; + collectionInterval?: number; + } = {} ) { + const { + warnThreshold = MONITOR_EVENT_LOOP_WARN_THRESHOLD, + collectionStartDelay = MONITOR_EVENT_LOOP_THRESHOLD_START, + collectionInterval = MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL, + } = configs; + const eventLoopDelaysCollector = new EventLoopDelaysCollector(); timer(collectionStartDelay, collectionInterval) .pipe( @@ -47,7 +55,8 @@ export function startTrackingEventLoopDelaysThreshold( if (meanDurationMs > warnThreshold) { logger.warn( - `Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanDurationMs}ms.` + `Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanDurationMs}ms. ` + + `See https://ela.st/kibana-scaling-considerations for more information about scaling Kibana.` ); eventLoopCounter.incrementCounter({