Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core.metrics] Add support for multiple processes in ops metrics & stats API; deprecate process field #107900

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/core/server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,9 @@ export type {
OpsProcessMetrics,
MetricsServiceSetup,
MetricsServiceStart,
IntervalHistogram,
} from './metrics';
export { EventLoopDelaysMonitor } from './metrics';

export type { I18nServiceSetup } from './i18n';
export type {
Expand Down
38 changes: 23 additions & 15 deletions src/core/server/metrics/collectors/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,25 @@
*/

import v8 from 'v8';
import { Bench } from '@hapi/hoek';
import { OpsProcessMetrics, MetricsCollector } from './types';
import { EventLoopDelaysMonitor } from '../event_loop_delays';

export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetrics> {
public async collect(): Promise<OpsProcessMetrics> {
export const MAIN_THREAD_PROCESS_NAME = 'main_thread';
Bamieh marked this conversation as resolved.
Show resolved Hide resolved

export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetrics[]> {
static getMainThreadMetrics(processes: OpsProcessMetrics[]): undefined | OpsProcessMetrics {
return processes.find(({ name }) => name === MAIN_THREAD_PROCESS_NAME);
}

private readonly eventLoopDelayMonitor = new EventLoopDelaysMonitor();

private getCurrentPidMetrics(): OpsProcessMetrics {
const eventLoopDelayHistogram = this.eventLoopDelayMonitor.collect();
const heapStats = v8.getHeapStatistics();
const memoryUsage = process.memoryUsage();
const [eventLoopDelay] = await Promise.all([getEventLoopDelay()]);

return {
name: MAIN_THREAD_PROCESS_NAME,
memory: {
heap: {
total_in_bytes: memoryUsage.heapTotal,
Expand All @@ -25,19 +35,17 @@ export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetri
resident_set_size_in_bytes: memoryUsage.rss,
},
pid: process.pid,
event_loop_delay: eventLoopDelay,
event_loop_delay: eventLoopDelayHistogram.mean,
event_loop_delay_histogram: eventLoopDelayHistogram,
uptime_in_millis: process.uptime() * 1000,
};
}

public reset() {}
}
public collect(): OpsProcessMetrics[] {
return [this.getCurrentPidMetrics()];
}

const getEventLoopDelay = (): Promise<number> => {
const bench = new Bench();
return new Promise((resolve) => {
setImmediate(() => {
return resolve(bench.elapsed());
});
});
};
public reset() {
this.eventLoopDelayMonitor.reset();
}
}
13 changes: 9 additions & 4 deletions src/core/server/metrics/collectors/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import type { IntervalHistogram } from '../event_loop_delays';

/** Base interface for all metrics gatherers */
export interface MetricsCollector<T> {
/** collect the data currently gathered by the collector */
collect(): Promise<T>;
collect(): Promise<T> | T;
/** reset the internal state of the collector */
reset(): void;
}
Expand All @@ -19,6 +20,10 @@ export interface MetricsCollector<T> {
* @public
*/
export interface OpsProcessMetrics {
/** pid of the kibana process */
pid: number;
/** name of process (example: 'coordinator' | 'server_worker' | 'task_worker' | 'reporting_worker') */
name: string;
Bamieh marked this conversation as resolved.
Show resolved Hide resolved
/** process memory usage */
memory: {
/** heap memory usage */
Expand All @@ -33,10 +38,10 @@ export interface OpsProcessMetrics {
/** node rss */
resident_set_size_in_bytes: number;
};
/** node event loop delay */
/** mean event loop delay since last collection*/
event_loop_delay: number;
/** pid of the kibana process */
pid: number;
/** node event loop delay histogram since last collection */
event_loop_delay_histogram: IntervalHistogram;
/** uptime of the kibana process */
uptime_in_millis: number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import type { EventLoopDelayMonitor } from 'perf_hooks';
import { monitorEventLoopDelay } from 'perf_hooks';
import { MONITOR_EVENT_LOOP_DELAYS_RESOLUTION } from './constants';

export interface IntervalHistogram {
fromTimestamp: string;
Expand All @@ -26,37 +25,40 @@ export interface IntervalHistogram {
};
}

export class EventLoopDelaysCollector {
export class EventLoopDelaysMonitor {
private readonly loopMonitor: EventLoopDelayMonitor;
private fromTimestamp: Date;

constructor() {
const monitor = monitorEventLoopDelay({
resolution: MONITOR_EVENT_LOOP_DELAYS_RESOLUTION,
});
const monitor = monitorEventLoopDelay();
monitor.enable();
this.fromTimestamp = new Date();
this.loopMonitor = monitor;
}

public collect(): IntervalHistogram {
const lastUpdated = new Date();
this.loopMonitor.disable();
const { min, max, mean, exceeds, stddev } = this.loopMonitor;

return {
const collectedData: IntervalHistogram = {
min,
max,
mean,
exceeds,
stddev,
fromTimestamp: this.fromTimestamp.toISOString(),
lastUpdatedAt: new Date().toISOString(),
lastUpdatedAt: lastUpdated.toISOString(),
percentiles: {
50: this.loopMonitor.percentile(50),
75: this.loopMonitor.percentile(75),
95: this.loopMonitor.percentile(95),
99: this.loopMonitor.percentile(99),
},
};

this.loopMonitor.enable();
return collectedData;
}

public reset() {
Expand Down
10 changes: 10 additions & 0 deletions src/core/server/metrics/event_loop_delays/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

export { EventLoopDelaysMonitor } from './event_loop_delays_monitor';
export type { IntervalHistogram } from './event_loop_delays_monitor';
2 changes: 2 additions & 0 deletions src/core/server/metrics/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ export type { OpsProcessMetrics, OpsServerMetrics, OpsOsMetrics } from './collec
export { MetricsService } from './metrics_service';
export { opsConfig } from './ops_config';
export type { OpsConfigType } from './ops_config';
export { EventLoopDelaysMonitor } from './event_loop_delays';
export type { IntervalHistogram } from './event_loop_delays';
7 changes: 5 additions & 2 deletions src/core/server/metrics/ops_metrics_collector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,17 @@ export class OpsMetricsCollector implements MetricsCollector<OpsMetrics> {
}

public async collect(): Promise<OpsMetrics> {
const [process, os, server] = await Promise.all([
const [processes, os, server] = await Promise.all([
this.processCollector.collect(),
this.osCollector.collect(),
this.serverCollector.collect(),
]);
const mainProcess = ProcessMetricsCollector.getMainThreadMetrics(processes) || processes[0];

return {
collected_at: new Date(),
process,
process: mainProcess,
processes,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os,
...server,
};
Expand Down
7 changes: 6 additions & 1 deletion src/core/server/metrics/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,13 @@ export type InternalMetricsServiceStart = MetricsServiceStart;
export interface OpsMetrics {
/** Time metrics were recorded at. */
collected_at: Date;
/** Process related metrics */
/**
* Process related metrics. Deprecated in favor of processes field.
* @deprecated
*/
process: OpsProcessMetrics;
/** Process related metrics. Reports an array of objects for each kibana pid.*/
processes: OpsProcessMetrics[];
/** OS related metrics */
os: OpsOsMetrics;
/** server response time stats */
Expand Down
19 changes: 5 additions & 14 deletions src/core/server/status/routes/status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { ServiceStatus, CoreStatus, ServiceStatusLevels } from '../types';
import { PluginName } from '../../plugins';
import { calculateLegacyStatus, LegacyStatusInfo } from '../legacy_status';
import { PackageInfo } from '../../config';

import type { OpsProcessMetrics } from '../../metrics';
const SNAPSHOT_POSTFIX = /-SNAPSHOT$/;

interface Deps {
Expand Down Expand Up @@ -55,19 +55,9 @@ interface StatusHttpBody {
/** ISO-8601 date string w/o timezone */
last_updated: string;
collection_interval_in_millis: number;
process: {
memory: {
heap: {
total_in_bytes: number;
used_in_bytes: number;
size_limit: number;
};
resident_set_size_in_bytes: number;
};
event_loop_delay: number;
pid: number;
uptime_in_millis: number;
};
/** @deprecated */
process: OpsProcessMetrics;
processes: OpsProcessMetrics[];
os: {
load: Record<string, number>;
memory: {
Expand Down Expand Up @@ -151,6 +141,7 @@ export const registerStatusRoute = ({ router, config, metrics, status }: Deps) =
collection_interval_in_millis: metrics.collectionInterval,
os: lastMetrics.os,
process: lastMetrics.process,
processes: lastMetrics.processes,
response_times: lastMetrics.response_times,
concurrent_connections: lastMetrics.concurrent_connections,
requests: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ export const MONITOR_EVENT_LOOP_DELAYS_RESET = 24 * 60 * 60 * 1000;
*/
export const MONITOR_EVENT_LOOP_DELAYS_START = 1 * 60 * 1000;

/**
* Event loop monitoring sampling rate in milliseconds.
*/
export const MONITOR_EVENT_LOOP_DELAYS_RESOLUTION = 10;

/**
* Mean event loop delay threshold for logging a warning.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import { takeUntil, finalize, map } from 'rxjs/operators';
import { Observable, timer } from 'rxjs';
import type { ISavedObjectsRepository } from 'kibana/server';
import { EventLoopDelaysMonitor } from '../../../../../core/server';
import {
MONITOR_EVENT_LOOP_DELAYS_START,
MONITOR_EVENT_LOOP_DELAYS_INTERVAL,
MONITOR_EVENT_LOOP_DELAYS_RESET,
} from './constants';
import { storeHistogram } from './saved_objects';
import { EventLoopDelaysCollector } from './event_loop_delays';

/**
* The monitoring of the event loop starts immediately.
Expand All @@ -37,19 +37,19 @@ export function startTrackingEventLoopDelaysUsage(
histogramReset = MONITOR_EVENT_LOOP_DELAYS_RESET,
} = configs;

const eventLoopDelaysCollector = new EventLoopDelaysCollector();
const eventLoopDelaysMonitor = new EventLoopDelaysMonitor();
const resetOnCount = Math.ceil(histogramReset / collectionInterval);

timer(collectionStartDelay, collectionInterval)
.pipe(
map((i) => (i + 1) % resetOnCount === 0),
takeUntil(stopMonitoringEventLoop$),
finalize(() => eventLoopDelaysCollector.stop())
finalize(() => eventLoopDelaysMonitor.stop())
)
.subscribe(async (shouldReset) => {
const histogram = eventLoopDelaysCollector.collect();
const histogram = eventLoopDelaysMonitor.collect();
if (shouldReset) {
eventLoopDelaysCollector.reset();
eventLoopDelaysMonitor.reset();
}
await storeHistogram(histogram, internalRepository);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import {
MONITOR_EVENT_LOOP_WARN_THRESHOLD,
ONE_MILLISECOND_AS_NANOSECONDS,
} from './constants';
import { EventLoopDelaysCollector } from './event_loop_delays';
import { EventLoopDelaysMonitor } from '../../../../../core/server';

/**
* The monitoring of the event loop starts immediately.
Expand All @@ -41,14 +41,14 @@ export function startTrackingEventLoopDelaysThreshold(
collectionInterval = MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL,
} = configs;

const eventLoopDelaysCollector = new EventLoopDelaysCollector();
const eventLoopDelaysMonitor = new EventLoopDelaysMonitor();
timer(collectionStartDelay, collectionInterval)
.pipe(
takeUntil(stopMonitoringEventLoop$),
finalize(() => eventLoopDelaysCollector.stop())
finalize(() => eventLoopDelaysMonitor.stop())
)
.subscribe(async () => {
const { mean } = eventLoopDelaysCollector.collect();
const { mean } = eventLoopDelaysMonitor.collect();
const meanDurationMs = moment
.duration(mean / ONE_MILLISECOND_AS_NANOSECONDS)
.asMilliseconds();
Expand All @@ -64,6 +64,6 @@ export function startTrackingEventLoopDelaysThreshold(
});
}

eventLoopDelaysCollector.reset();
eventLoopDelaysMonitor.reset();
});
}