Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.0] Handles ns to ms conversion for event loop delay metrics (#118447) #118612

Merged
merged 1 commit into from
Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@ import moment from 'moment';
import type { EventLoopDelaysMonitor } from './event_loop_delays_monitor';
import type { IntervalHistogram } from '../types';

function createMockHistogram(overwrites: Partial<IntervalHistogram> = {}): IntervalHistogram {
function createMockRawNsDataHistogram(
overwrites: Partial<IntervalHistogram> = {}
): IntervalHistogram {
const now = Date.now();

return {
const mockedRawCollectedDataInNs = {
min: 9093120,
max: 53247999,
mean: 11993238.600747818,
mean: 11993238,
exceeds: 0,
stddev: 1168191.9357543814,
stddev: 1168191,
fromTimestamp: moment(now).toISOString(),
lastUpdatedAt: moment(now).toISOString(),
percentiles: {
Expand All @@ -28,6 +30,31 @@ function createMockHistogram(overwrites: Partial<IntervalHistogram> = {}): Inter
},
...overwrites,
};
return mockedRawCollectedDataInNs;
}

function createMockMonitorDataMsHistogram(
overwrites: Partial<IntervalHistogram> = {}
): IntervalHistogram {
const now = Date.now();

const mockedRawCollectedDataInMs = {
min: 9.09312,
max: 53.247999,
mean: 11.993238,
exceeds: 0,
stddev: 1.168191,
fromTimestamp: moment(now).toISOString(),
lastUpdatedAt: moment(now).toISOString(),
percentiles: {
'50': 12.607487,
'75': 12.615679,
'95': 12.648447,
'99': 12.713983,
},
...overwrites,
};
return mockedRawCollectedDataInMs;
}

function createMockEventLoopDelaysMonitor() {
Expand All @@ -40,12 +67,12 @@ function createMockEventLoopDelaysMonitor() {
stop: jest.fn(),
});

mockCollect.mockReturnValue(createMockHistogram());
mockCollect.mockReturnValue(createMockMonitorDataMsHistogram()); // this must mock the return value of the public collect method from this monitor.

return new MockEventLoopDelaysMonitor();
}

export const mocked = {
createHistogram: createMockHistogram,
createHistogram: createMockRawNsDataHistogram, // raw data as received from Node.js perf_hooks.monitorEventLoopDelay([options])
createEventLoopDelaysMonitor: createMockEventLoopDelaysMonitor,
};
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
/* eslint-disable dot-notation */
jest.mock('perf_hooks');
import { monitorEventLoopDelay } from 'perf_hooks';
import { EventLoopDelaysMonitor } from './event_loop_delays_monitor';
import { EventLoopDelaysMonitor, nsToMs } from './event_loop_delays_monitor';
import { mocked } from './event_loop_delays_monitor.mocks';

describe('EventLoopDelaysMonitor', () => {
Expand Down Expand Up @@ -39,10 +39,20 @@ describe('EventLoopDelaysMonitor', () => {
expect(eventLoopDelaysMonitor['loopMonitor'].percentile).toHaveBeenNthCalledWith(3, 95);
expect(eventLoopDelaysMonitor['loopMonitor'].percentile).toHaveBeenNthCalledWith(4, 99);

// mocked perf_hook returns `mocked.createHistogram()`.
// mocked perf_hook returns `mocked.createNsHistogram()` that returns data in ns.
// The `collect` function returns the data in ms.
// This ensures that the wiring of the `collect` function is correct.
const mockedHistogram = mocked.createHistogram();
expect(histogramData).toEqual(mockedHistogram);

expect(histogramData.min).toEqual(nsToMs(mockedHistogram.min));
expect(histogramData.max).toEqual(nsToMs(mockedHistogram.max));
expect(histogramData.mean).toEqual(nsToMs(mockedHistogram.mean));
expect(histogramData.stddev).toEqual(nsToMs(mockedHistogram.stddev));
expect(histogramData.exceeds).toEqual(nsToMs(mockedHistogram.exceeds));
expect(histogramData.percentiles['50']).toEqual(nsToMs(mockedHistogram.percentiles['50']));
expect(histogramData.percentiles['75']).toEqual(nsToMs(mockedHistogram.percentiles['75']));
expect(histogramData.percentiles['95']).toEqual(nsToMs(mockedHistogram.percentiles['95']));
expect(histogramData.percentiles['99']).toEqual(nsToMs(mockedHistogram.percentiles['99']));
});

test('#reset resets histogram data', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ import type { IntervalHistogram as PerfIntervalHistogram } from 'perf_hooks';
import { monitorEventLoopDelay } from 'perf_hooks';
import type { IntervalHistogram } from '../types';

/**
* Nanosecond to milisecond conversion unit
*/
export const ONE_MILLISECOND_AS_NANOSECONDS = 1_000_000;

/**
* Converts time metric from ns to ms
**/
export function nsToMs(metric: number) {
return metric / ONE_MILLISECOND_AS_NANOSECONDS;
}

export class EventLoopDelaysMonitor {
private readonly loopMonitor: PerfIntervalHistogram;
private fromTimestamp: Date;
Expand All @@ -28,26 +40,36 @@ export class EventLoopDelaysMonitor {
* Collect gathers event loop delays metrics from nodejs perf_hooks.monitorEventLoopDelay
* the histogram calculations start from the last time `reset` was called or this
* EventLoopDelaysMonitor instance was created.
*
* Returns metrics in milliseconds.

* @returns {IntervalHistogram}
*/

public collect(): IntervalHistogram {
const lastUpdated = new Date();
this.loopMonitor.disable();
const { min, max, mean, exceeds, stddev } = this.loopMonitor;
const {
min: minNs,
max: maxNs,
mean: meanNs,
exceeds: exceedsNs,
stddev: stddevNs,
} = this.loopMonitor;

const collectedData: IntervalHistogram = {
min,
max,
mean,
exceeds,
stddev,
min: nsToMs(minNs),
max: nsToMs(maxNs),
mean: nsToMs(meanNs),
exceeds: nsToMs(exceedsNs),
stddev: nsToMs(stddevNs),
fromTimestamp: this.fromTimestamp.toISOString(),
lastUpdatedAt: lastUpdated.toISOString(),
percentiles: {
50: this.loopMonitor.percentile(50),
75: this.loopMonitor.percentile(75),
95: this.loopMonitor.percentile(95),
99: this.loopMonitor.percentile(99),
50: nsToMs(this.loopMonitor.percentile(50)),
75: nsToMs(this.loopMonitor.percentile(75)),
95: nsToMs(this.loopMonitor.percentile(95)),
99: nsToMs(this.loopMonitor.percentile(99)),
},
};

Expand Down
2 changes: 1 addition & 1 deletion src/core/server/metrics/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export interface OpsMetrics {

/**
* an IntervalHistogram object that samples and reports the event loop delay over time.
* The delays will be reported in nanoseconds.
* The delays will be reported in milliseconds.
*
* @public
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export const MONITOR_EVENT_LOOP_DELAYS_RESET = 24 * 60 * 60 * 1000;
export const MONITOR_EVENT_LOOP_DELAYS_START = 1 * 60 * 1000;

/**
* Mean event loop delay threshold for logging a warning.
* Mean event loop delay threshold in ms for logging a warning.
*/
export const MONITOR_EVENT_LOOP_WARN_THRESHOLD = 350;

Expand All @@ -45,8 +45,3 @@ export const MONITOR_EVENT_LOOP_THRESHOLD_START = 1 * 60 * 1000;
* Check the event loop utilization every 30 seconds
*/
export const MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL = 30 * 1000;

/**
* Nanosecond to milisecond conversion unit
*/
export const ONE_MILLISECOND_AS_NANOSECONDS = 1000000;
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,19 @@ export const eventLoopDelaysUsageSchema: MakeSchemaFrom<EventLoopDelaysUsageRepo
min: {
type: 'long',
_meta: {
description: 'The minimum recorded event loop delay.',
description: 'The minimum recorded event loop delay in ms.',
},
},
max: {
type: 'long',
_meta: {
description: 'The maximum recorded event loop delay.',
description: 'The maximum recorded event loop delay in ms.',
},
},
mean: {
type: 'long',
_meta: {
description: 'The mean of the recorded event loop delays.',
description: 'The mean of the recorded event loop delays in ms.',
},
},
exceeds: {
Expand All @@ -84,32 +84,32 @@ export const eventLoopDelaysUsageSchema: MakeSchemaFrom<EventLoopDelaysUsageRepo
stddev: {
type: 'long',
_meta: {
description: 'The standard deviation of the recorded event loop delays.',
description: 'The standard deviation of the recorded event loop delays in ms.',
},
},
percentiles: {
'50': {
type: 'long',
_meta: {
description: 'The 50th accumulated percentile distribution',
description: 'The 50th accumulated percentile distribution in ms',
},
},
'75': {
type: 'long',
_meta: {
description: 'The 75th accumulated percentile distribution',
description: 'The 75th accumulated percentile distribution in ms',
},
},
'95': {
type: 'long',
_meta: {
description: 'The 95th accumulated percentile distribution',
description: 'The 95th accumulated percentile distribution in ms',
},
},
'99': {
type: 'long',
_meta: {
description: 'The 99th accumulated percentile distribution',
description: 'The 99th accumulated percentile distribution in ms',
},
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { storeHistogram } from './saved_objects';
* The monitoring of the event loop starts immediately.
* The first collection of the histogram happens after 1 minute.
* The daily histogram data is updated every 1 hour.
* The histogram metrics are in milliseconds.
*/
export function startTrackingEventLoopDelaysUsage(
internalRepository: ISavedObjectsRepository,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@
import { takeUntil, finalize } from 'rxjs/operators';
import { Observable, timer } from 'rxjs';
import type { Logger } from 'kibana/server';
import moment from 'moment';
import type { UsageCounter } from '../../../../usage_collection/server';
import {
MONITOR_EVENT_LOOP_THRESHOLD_START,
MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL,
MONITOR_EVENT_LOOP_WARN_THRESHOLD,
ONE_MILLISECOND_AS_NANOSECONDS,
} from './constants';
import type { EventLoopDelaysMonitor } from '../../../../../core/server';

Expand Down Expand Up @@ -48,14 +46,11 @@ export function startTrackingEventLoopDelaysThreshold(
finalize(() => eventLoopDelaysMonitor.stop())
)
.subscribe(async () => {
const { mean } = eventLoopDelaysMonitor.collect();
const meanDurationMs = moment
.duration(mean / ONE_MILLISECOND_AS_NANOSECONDS)
.asMilliseconds();
const { mean: meanMS } = eventLoopDelaysMonitor.collect();

if (meanDurationMs > warnThreshold) {
if (meanMS > warnThreshold) {
logger.warn(
`Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanDurationMs}ms. ` +
`Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanMS}ms. ` +
`See https://ela.st/kibana-scaling-considerations for more information about scaling Kibana.`
);

Expand Down
18 changes: 9 additions & 9 deletions src/plugins/telemetry/schema/oss_plugins.json
Original file line number Diff line number Diff line change
Expand Up @@ -7075,19 +7075,19 @@
"min": {
"type": "long",
"_meta": {
"description": "The minimum recorded event loop delay."
"description": "The minimum recorded event loop delay in ms."
}
},
"max": {
"type": "long",
"_meta": {
"description": "The maximum recorded event loop delay."
"description": "The maximum recorded event loop delay in ms."
}
},
"mean": {
"type": "long",
"_meta": {
"description": "The mean of the recorded event loop delays."
"description": "The mean of the recorded event loop delays in ms."
}
},
"exceeds": {
Expand All @@ -7099,33 +7099,33 @@
"stddev": {
"type": "long",
"_meta": {
"description": "The standard deviation of the recorded event loop delays."
"description": "The standard deviation of the recorded event loop delays in ms."
}
},
"percentiles": {
"properties": {
"50": {
"type": "long",
"_meta": {
"description": "The 50th accumulated percentile distribution"
"description": "The 50th accumulated percentile distribution in ms"
}
},
"75": {
"type": "long",
"_meta": {
"description": "The 75th accumulated percentile distribution"
"description": "The 75th accumulated percentile distribution in ms"
}
},
"95": {
"type": "long",
"_meta": {
"description": "The 95th accumulated percentile distribution"
"description": "The 95th accumulated percentile distribution in ms"
}
},
"99": {
"type": "long",
"_meta": {
"description": "The 99th accumulated percentile distribution"
"description": "The 99th accumulated percentile distribution in ms"
}
}
}
Expand Down Expand Up @@ -9301,4 +9301,4 @@
}
}
}
}
}
Loading