Skip to content

Commit

Permalink
Handles ns to ms conversion for event loop delay metrics (elastic#118447
Browse files Browse the repository at this point in the history
)

Co-authored-by: Kibana Machine <[email protected]>
  • Loading branch information
2 people authored and mbondyra committed Nov 16, 2021
1 parent c4e0413 commit 45f7468
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@ import moment from 'moment';
import type { EventLoopDelaysMonitor } from './event_loop_delays_monitor';
import type { IntervalHistogram } from '../types';

function createMockHistogram(overwrites: Partial<IntervalHistogram> = {}): IntervalHistogram {
function createMockRawNsDataHistogram(
overwrites: Partial<IntervalHistogram> = {}
): IntervalHistogram {
const now = Date.now();

return {
const mockedRawCollectedDataInNs = {
min: 9093120,
max: 53247999,
mean: 11993238.600747818,
mean: 11993238,
exceeds: 0,
stddev: 1168191.9357543814,
stddev: 1168191,
fromTimestamp: moment(now).toISOString(),
lastUpdatedAt: moment(now).toISOString(),
percentiles: {
Expand All @@ -28,6 +30,31 @@ function createMockHistogram(overwrites: Partial<IntervalHistogram> = {}): Inter
},
...overwrites,
};
return mockedRawCollectedDataInNs;
}

function createMockMonitorDataMsHistogram(
overwrites: Partial<IntervalHistogram> = {}
): IntervalHistogram {
const now = Date.now();

const mockedRawCollectedDataInMs = {
min: 9.09312,
max: 53.247999,
mean: 11.993238,
exceeds: 0,
stddev: 1.168191,
fromTimestamp: moment(now).toISOString(),
lastUpdatedAt: moment(now).toISOString(),
percentiles: {
'50': 12.607487,
'75': 12.615679,
'95': 12.648447,
'99': 12.713983,
},
...overwrites,
};
return mockedRawCollectedDataInMs;
}

function createMockEventLoopDelaysMonitor() {
Expand All @@ -40,12 +67,12 @@ function createMockEventLoopDelaysMonitor() {
stop: jest.fn(),
});

mockCollect.mockReturnValue(createMockHistogram());
mockCollect.mockReturnValue(createMockMonitorDataMsHistogram()); // this must mock the return value of the public collect method from this monitor.

return new MockEventLoopDelaysMonitor();
}

export const mocked = {
createHistogram: createMockHistogram,
createHistogram: createMockRawNsDataHistogram, // raw data as received from Node.js perf_hooks.monitorEventLoopDelay([options])
createEventLoopDelaysMonitor: createMockEventLoopDelaysMonitor,
};
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
/* eslint-disable dot-notation */
jest.mock('perf_hooks');
import { monitorEventLoopDelay } from 'perf_hooks';
import { EventLoopDelaysMonitor } from './event_loop_delays_monitor';
import { EventLoopDelaysMonitor, nsToMs } from './event_loop_delays_monitor';
import { mocked } from './event_loop_delays_monitor.mocks';

describe('EventLoopDelaysMonitor', () => {
Expand Down Expand Up @@ -39,10 +39,20 @@ describe('EventLoopDelaysMonitor', () => {
expect(eventLoopDelaysMonitor['loopMonitor'].percentile).toHaveBeenNthCalledWith(3, 95);
expect(eventLoopDelaysMonitor['loopMonitor'].percentile).toHaveBeenNthCalledWith(4, 99);

// mocked perf_hook returns `mocked.createHistogram()`.
// mocked perf_hook returns `mocked.createNsHistogram()` that returns data in ns.
// The `collect` function returns the data in ms.
// This ensures that the wiring of the `collect` function is correct.
const mockedHistogram = mocked.createHistogram();
expect(histogramData).toEqual(mockedHistogram);

expect(histogramData.min).toEqual(nsToMs(mockedHistogram.min));
expect(histogramData.max).toEqual(nsToMs(mockedHistogram.max));
expect(histogramData.mean).toEqual(nsToMs(mockedHistogram.mean));
expect(histogramData.stddev).toEqual(nsToMs(mockedHistogram.stddev));
expect(histogramData.exceeds).toEqual(nsToMs(mockedHistogram.exceeds));
expect(histogramData.percentiles['50']).toEqual(nsToMs(mockedHistogram.percentiles['50']));
expect(histogramData.percentiles['75']).toEqual(nsToMs(mockedHistogram.percentiles['75']));
expect(histogramData.percentiles['95']).toEqual(nsToMs(mockedHistogram.percentiles['95']));
expect(histogramData.percentiles['99']).toEqual(nsToMs(mockedHistogram.percentiles['99']));
});

test('#reset resets histogram data', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ import type { IntervalHistogram as PerfIntervalHistogram } from 'perf_hooks';
import { monitorEventLoopDelay } from 'perf_hooks';
import type { IntervalHistogram } from '../types';

/**
* Nanosecond to milisecond conversion unit
*/
export const ONE_MILLISECOND_AS_NANOSECONDS = 1_000_000;

/**
* Converts time metric from ns to ms
**/
export function nsToMs(metric: number) {
return metric / ONE_MILLISECOND_AS_NANOSECONDS;
}

export class EventLoopDelaysMonitor {
private readonly loopMonitor: PerfIntervalHistogram;
private fromTimestamp: Date;
Expand All @@ -28,26 +40,36 @@ export class EventLoopDelaysMonitor {
* Collect gathers event loop delays metrics from nodejs perf_hooks.monitorEventLoopDelay
* the histogram calculations start from the last time `reset` was called or this
* EventLoopDelaysMonitor instance was created.
*
* Returns metrics in milliseconds.
* @returns {IntervalHistogram}
*/

public collect(): IntervalHistogram {
const lastUpdated = new Date();
this.loopMonitor.disable();
const { min, max, mean, exceeds, stddev } = this.loopMonitor;
const {
min: minNs,
max: maxNs,
mean: meanNs,
exceeds: exceedsNs,
stddev: stddevNs,
} = this.loopMonitor;

const collectedData: IntervalHistogram = {
min,
max,
mean,
exceeds,
stddev,
min: nsToMs(minNs),
max: nsToMs(maxNs),
mean: nsToMs(meanNs),
exceeds: nsToMs(exceedsNs),
stddev: nsToMs(stddevNs),
fromTimestamp: this.fromTimestamp.toISOString(),
lastUpdatedAt: lastUpdated.toISOString(),
percentiles: {
50: this.loopMonitor.percentile(50),
75: this.loopMonitor.percentile(75),
95: this.loopMonitor.percentile(95),
99: this.loopMonitor.percentile(99),
50: nsToMs(this.loopMonitor.percentile(50)),
75: nsToMs(this.loopMonitor.percentile(75)),
95: nsToMs(this.loopMonitor.percentile(95)),
99: nsToMs(this.loopMonitor.percentile(99)),
},
};

Expand Down
2 changes: 1 addition & 1 deletion src/core/server/metrics/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export interface OpsMetrics {

/**
* an IntervalHistogram object that samples and reports the event loop delay over time.
* The delays will be reported in nanoseconds.
* The delays will be reported in milliseconds.
*
* @public
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export const MONITOR_EVENT_LOOP_DELAYS_RESET = 24 * 60 * 60 * 1000;
export const MONITOR_EVENT_LOOP_DELAYS_START = 1 * 60 * 1000;

/**
* Mean event loop delay threshold for logging a warning.
* Mean event loop delay threshold in ms for logging a warning.
*/
export const MONITOR_EVENT_LOOP_WARN_THRESHOLD = 350;

Expand All @@ -45,8 +45,3 @@ export const MONITOR_EVENT_LOOP_THRESHOLD_START = 1 * 60 * 1000;
* Check the event loop utilization every 30 seconds
*/
export const MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL = 30 * 1000;

/**
* Nanosecond to milisecond conversion unit
*/
export const ONE_MILLISECOND_AS_NANOSECONDS = 1000000;
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,19 @@ export const eventLoopDelaysUsageSchema: MakeSchemaFrom<EventLoopDelaysUsageRepo
min: {
type: 'long',
_meta: {
description: 'The minimum recorded event loop delay.',
description: 'The minimum recorded event loop delay in ms.',
},
},
max: {
type: 'long',
_meta: {
description: 'The maximum recorded event loop delay.',
description: 'The maximum recorded event loop delay in ms.',
},
},
mean: {
type: 'long',
_meta: {
description: 'The mean of the recorded event loop delays.',
description: 'The mean of the recorded event loop delays in ms.',
},
},
exceeds: {
Expand All @@ -84,32 +84,32 @@ export const eventLoopDelaysUsageSchema: MakeSchemaFrom<EventLoopDelaysUsageRepo
stddev: {
type: 'long',
_meta: {
description: 'The standard deviation of the recorded event loop delays.',
description: 'The standard deviation of the recorded event loop delays in ms.',
},
},
percentiles: {
'50': {
type: 'long',
_meta: {
description: 'The 50th accumulated percentile distribution',
description: 'The 50th accumulated percentile distribution in ms',
},
},
'75': {
type: 'long',
_meta: {
description: 'The 75th accumulated percentile distribution',
description: 'The 75th accumulated percentile distribution in ms',
},
},
'95': {
type: 'long',
_meta: {
description: 'The 95th accumulated percentile distribution',
description: 'The 95th accumulated percentile distribution in ms',
},
},
'99': {
type: 'long',
_meta: {
description: 'The 99th accumulated percentile distribution',
description: 'The 99th accumulated percentile distribution in ms',
},
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { storeHistogram } from './saved_objects';
* The monitoring of the event loop starts immediately.
* The first collection of the histogram happens after 1 minute.
* The daily histogram data is updated every 1 hour.
* The histogram metrics are in milliseconds.
*/
export function startTrackingEventLoopDelaysUsage(
internalRepository: ISavedObjectsRepository,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@
import { takeUntil, finalize } from 'rxjs/operators';
import { Observable, timer } from 'rxjs';
import type { Logger } from 'kibana/server';
import moment from 'moment';
import type { UsageCounter } from '../../../../usage_collection/server';
import {
MONITOR_EVENT_LOOP_THRESHOLD_START,
MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL,
MONITOR_EVENT_LOOP_WARN_THRESHOLD,
ONE_MILLISECOND_AS_NANOSECONDS,
} from './constants';
import type { EventLoopDelaysMonitor } from '../../../../../core/server';

Expand Down Expand Up @@ -48,14 +46,11 @@ export function startTrackingEventLoopDelaysThreshold(
finalize(() => eventLoopDelaysMonitor.stop())
)
.subscribe(async () => {
const { mean } = eventLoopDelaysMonitor.collect();
const meanDurationMs = moment
.duration(mean / ONE_MILLISECOND_AS_NANOSECONDS)
.asMilliseconds();
const { mean: meanMS } = eventLoopDelaysMonitor.collect();

if (meanDurationMs > warnThreshold) {
if (meanMS > warnThreshold) {
logger.warn(
`Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanDurationMs}ms. ` +
`Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanMS}ms. ` +
`See https://ela.st/kibana-scaling-considerations for more information about scaling Kibana.`
);

Expand Down
18 changes: 9 additions & 9 deletions src/plugins/telemetry/schema/oss_plugins.json
Original file line number Diff line number Diff line change
Expand Up @@ -7075,19 +7075,19 @@
"min": {
"type": "long",
"_meta": {
"description": "The minimum recorded event loop delay."
"description": "The minimum recorded event loop delay in ms."
}
},
"max": {
"type": "long",
"_meta": {
"description": "The maximum recorded event loop delay."
"description": "The maximum recorded event loop delay in ms."
}
},
"mean": {
"type": "long",
"_meta": {
"description": "The mean of the recorded event loop delays."
"description": "The mean of the recorded event loop delays in ms."
}
},
"exceeds": {
Expand All @@ -7099,33 +7099,33 @@
"stddev": {
"type": "long",
"_meta": {
"description": "The standard deviation of the recorded event loop delays."
"description": "The standard deviation of the recorded event loop delays in ms."
}
},
"percentiles": {
"properties": {
"50": {
"type": "long",
"_meta": {
"description": "The 50th accumulated percentile distribution"
"description": "The 50th accumulated percentile distribution in ms"
}
},
"75": {
"type": "long",
"_meta": {
"description": "The 75th accumulated percentile distribution"
"description": "The 75th accumulated percentile distribution in ms"
}
},
"95": {
"type": "long",
"_meta": {
"description": "The 95th accumulated percentile distribution"
"description": "The 95th accumulated percentile distribution in ms"
}
},
"99": {
"type": "long",
"_meta": {
"description": "The 99th accumulated percentile distribution"
"description": "The 99th accumulated percentile distribution in ms"
}
}
}
Expand Down Expand Up @@ -9301,4 +9301,4 @@
}
}
}
}
}
Loading

0 comments on commit 45f7468

Please sign in to comment.