Skip to content

Commit

Permalink
[8.11] Core metrics collection - collect more memory related metrics (#…
Browse files Browse the repository at this point in the history
…172146) (#172271)

# Backport

This will backport the following commits from `main` to `8.11`:
- [Core metrics collection - collect more memory related metrics
(#172146)](#172146)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Pierre
Gayvallet","email":"[email protected]"},"sourceCommit":{"committedDate":"2023-11-30T10:47:46Z","message":"Core
metrics collection - collect more memory related metrics (#172146)\n\n##
Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/171060\r\n\r\nAdd additional
memory metrics for collection:\r\n\r\n**process**:\r\n-
`process.memory.array_buffers_in_bytes`
the\r\n`process.memoryUsage().arrayBuffers` value\r\n-
`process.memory.external_in_bytes`
the\r\n`process.memoryUsage().external` value\r\n\r\n**cgroup**: (v2
only)\r\n- `os.cgroupMemory.current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.current`\r\n-
`os.cgroupMemory.swap_current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.swap.current`","sha":"b323fc90a81dd404686c1858a3c525ae1cc922ae","branchLabelMapping":{"^v8.12.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["Team:Core","release_note:skip","Feature:Stack
Monitoring","backport:prev-minor","v8.12.0"],"number":172146,"url":"https://github.com/elastic/kibana/pull/172146","mergeCommit":{"message":"Core
metrics collection - collect more memory related metrics (#172146)\n\n##
Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/171060\r\n\r\nAdd additional
memory metrics for collection:\r\n\r\n**process**:\r\n-
`process.memory.array_buffers_in_bytes`
the\r\n`process.memoryUsage().arrayBuffers` value\r\n-
`process.memory.external_in_bytes`
the\r\n`process.memoryUsage().external` value\r\n\r\n**cgroup**: (v2
only)\r\n- `os.cgroupMemory.current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.current`\r\n-
`os.cgroupMemory.swap_current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.swap.current`","sha":"b323fc90a81dd404686c1858a3c525ae1cc922ae"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v8.12.0","labelRegex":"^v8.12.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/172146","number":172146,"mergeCommit":{"message":"Core
metrics collection - collect more memory related metrics (#172146)\n\n##
Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/171060\r\n\r\nAdd additional
memory metrics for collection:\r\n\r\n**process**:\r\n-
`process.memory.array_buffers_in_bytes`
the\r\n`process.memoryUsage().arrayBuffers` value\r\n-
`process.memory.external_in_bytes`
the\r\n`process.memoryUsage().external` value\r\n\r\n**cgroup**: (v2
only)\r\n- `os.cgroupMemory.current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.current`\r\n-
`os.cgroupMemory.swap_current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.swap.current`","sha":"b323fc90a81dd404686c1858a3c525ae1cc922ae"}}]}]
BACKPORT-->

Co-authored-by: Pierre Gayvallet <[email protected]>
  • Loading branch information
kibanamachine and pgayvallet authored Nov 30, 2023
1 parent 0e1d692 commit 4131708
Show file tree
Hide file tree
Showing 19 changed files with 176 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ const mockedResponse: StatusResponse = {
total_in_bytes: 0,
},
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),
Expand All @@ -96,6 +98,8 @@ const mockedResponse: StatusResponse = {
total_in_bytes: 0,
},
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,7 @@ describe('OsCgroupMetricsCollector', () => {
await collector.collect();

expect(gatherV2CgroupMetrics).toHaveBeenCalledTimes(1);
expect(gatherV2CgroupMetrics).toHaveBeenCalledWith({
cpuAcctPath: '/groupname',
cpuPath: '/groupname',
});
expect(gatherV2CgroupMetrics).toHaveBeenCalledWith('/groupname');
expect(gatherV1CgroupMetrics).toHaveBeenCalledTimes(0);
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,13 @@ export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetric
return {};
}

const args = { cpuAcctPath: this.cpuAcctPath!, cpuPath: this.cpuPath! };
// "await" to handle any errors here.
return await (this.isCgroup2 ? gatherV2CgroupMetrics(args) : gatherV1CgroupMetrics(args));
return await (this.isCgroup2
? gatherV2CgroupMetrics(this.cpuAcctPath!)
: gatherV1CgroupMetrics({
cpuAcctPath: this.cpuAcctPath!,
cpuPath: this.cpuPath!,
}));
} catch (err) {
this.noCgroupPresent = true;

Expand All @@ -67,10 +71,15 @@ export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetric
private async initializePaths(): Promise<void> {
if (this.hasPaths()) return;

const { data: cgroups, v2 } = await gatherInfo();
this.isCgroup2 = v2;
this.cpuPath = this.options.cpuPath || cgroups[GROUP_CPU];
this.cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT];
const result = await gatherInfo();
this.isCgroup2 = result.v2;
if (result.v2) {
this.cpuPath = result.path;
this.cpuAcctPath = result.path;
} else {
this.cpuPath = this.options.cpuPath || result.data[GROUP_CPU];
this.cpuAcctPath = this.options.cpuAcctPath || result.data[GROUP_CPUACCT];
}

// prevents undefined cgroup paths
this.noCgroupPresent = Boolean(!this.cpuPath || !this.cpuAcctPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ describe('gatherInfo', () => {
'/proc/self/cgroup': `0:controller:/path
1:controller2,controller3:/otherpath`,
});
const { data } = await gatherInfo();
expect(data).toEqual({
controller: '/path',
controller2: '/otherpath',
controller3: '/otherpath',
const result = await gatherInfo();
expect(result).toEqual({
v2: false,
data: {
controller: '/path',
controller2: '/otherpath',
controller3: '/otherpath',
},
});
});

Expand All @@ -30,15 +33,15 @@ describe('gatherInfo', () => {
'/proc/self/cgroup': `0:controller:/path
1:controller2,controller3:/otherpath`,
});
await expect(gatherInfo()).resolves.toMatchObject({ v2: false });
expect(await gatherInfo()).toMatchObject({ v2: false });
mockFs({
'/proc/self/cgroup': `
0::/path
`,
});
await expect(gatherInfo()).resolves.toMatchObject({ v2: true });
expect(await gatherInfo()).toMatchObject({ v2: true });
});

test('missing cgroup file', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
*/
import fs from 'fs/promises';

import { GROUP_CPU, GROUP_CPUACCT } from './constants';

const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
const CONTROLLER_SEPARATOR_RE = ',';
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
Expand All @@ -27,10 +25,15 @@ async function readProcSelf(): Promise<string[]> {
return data.split(/\n/).filter((line) => line.trim().length > 0);
}

interface Result {
data: Record<string, string>;
v2: boolean;
}
type Result =
| {
v2: true;
path: string;
}
| {
v2: false;
data: Record<string, string>;
};

export async function gatherInfo(): Promise<Result> {
const lines = await readProcSelf();
Expand All @@ -39,11 +42,8 @@ export async function gatherInfo(): Promise<Result> {
// eslint-disable-next-line prettier/prettier
const [/* '0' */, /* '' */, path] = lines[0].trim().split(':');
return {
data: {
[GROUP_CPU]: path,
[GROUP_CPUACCT]: path,
},
v2: true,
path,
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@

import type { OpsOsMetrics } from '@kbn/core-metrics-server';

export type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct'>;
export type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct' | 'cgroup_memory'>;
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@ system_usec 125968
nr_periods 123
nr_throttled 1
throttled_usec 123123`,
'/sys/fs/cgroup/memory.current': '9000',
'/sys/fs/cgroup/memory.swap.current': '42',
});

expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/', cpuPath: '/' })).toMatchInlineSnapshot(`
const metrics = await gatherV2CgroupMetrics('/');
expect(metrics).toMatchInlineSnapshot(`
Object {
"cgroup_memory": Object {
"current_in_bytes": 9000,
"swap_current_in_bytes": 42,
},
"cpu": Object {
"cfs_period_micros": 100000,
"cfs_quota_micros": -1,
Expand Down Expand Up @@ -54,11 +61,17 @@ system_usec 125968
nr_periods 123
nr_throttled 1
throttled_usec 123123`,
'/sys/fs/cgroup/mypath/memory.current': '9876',
'/sys/fs/cgroup/mypath/memory.swap.current': '132645',
});

expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/mypath', cpuPath: '/mypath' }))
.toMatchInlineSnapshot(`
const metrics = await gatherV2CgroupMetrics('/mypath');
expect(metrics).toMatchInlineSnapshot(`
Object {
"cgroup_memory": Object {
"current_in_bytes": 9876,
"swap_current_in_bytes": 132645,
},
"cpu": Object {
"cfs_period_micros": 100000,
"cfs_quota_micros": 111,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,60 @@ import type { OsCgroupMetrics } from './types';
const PROC_CGROUP2_DIR = '/sys/fs/cgroup';
const CPU_STATS_FILE = 'cpu.stat';
const CPU_MAX_FILE = 'cpu.max';
const MEMORY_CURRENT_FILE = 'memory.current';
const MEMORY_SWAP_CURRENT_FILE = 'memory.swap.current';

interface Arg {
cpuPath: string;
cpuAcctPath: string;
}
const getCGroupFilePath = (group: string, fileName: string): string => {
return joinPath(PROC_CGROUP2_DIR, group, fileName);
};

export async function gatherV2CgroupMetrics(arg: Arg): Promise<OsCgroupMetrics> {
const [{ usage_nanos: usageNanos, ...stat }, cpuMax] = await Promise.all([
readCPUStat(arg.cpuPath),
readCPUMax(arg.cpuPath),
]);
export async function gatherV2CgroupMetrics(group: string): Promise<OsCgroupMetrics> {
const [{ usage_nanos: usageNanos, ...stat }, cpuMax, memoryCurrent, swapCurrent] =
await Promise.all([
readCPUStat(group),
readCPUMax(group),
readMemoryCurrent(group),
readSwapCurrent(group),
]);

return {
cpu: {
...cpuMax,
control_group: arg.cpuPath,
control_group: group,
stat,
},
cpuacct: {
control_group: arg.cpuPath,
control_group: group,
usage_nanos: usageNanos,
},
cgroup_memory: {
current_in_bytes: memoryCurrent,
swap_current_in_bytes: swapCurrent,
},
};
}

interface CPUMax {
cfs_period_micros: number;
cfs_quota_micros: number;
}

async function readMemoryCurrent(group: string): Promise<number> {
const rawMemoryCurrent = (await fs.readFile(getCGroupFilePath(group, MEMORY_CURRENT_FILE)))
.toString()
.trim();
return parseInt(rawMemoryCurrent, 10);
}

async function readSwapCurrent(group: string): Promise<number> {
const rawMemoryCurrent = (await fs.readFile(getCGroupFilePath(group, MEMORY_SWAP_CURRENT_FILE)))
.toString()
.trim();
return parseInt(rawMemoryCurrent, 10);
}

async function readCPUMax(group: string): Promise<CPUMax> {
const [quota, period] = (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_MAX_FILE)))
const [quota, period] = (await fs.readFile(getCGroupFilePath(group, CPU_MAX_FILE)))
.toString()
.trim()
.split(/\s+/);
Expand All @@ -62,7 +85,7 @@ async function readCPUStat(group: string): Promise<CPUStat> {
time_throttled_nanos: -1,
usage_nanos: -1,
};
return (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_STATS_FILE)))
return (await fs.readFile(getCGroupFilePath(group, CPU_STATS_FILE)))
.toString()
.split(/\n/)
.reduce((acc, line) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ function createMockOpsProcessMetrics(): OpsProcessMetrics {
memory: {
heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 },
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ describe('ProcessMetricsCollector', () => {
const heapUsed = 4688;
const heapSizeLimit = 5788;
const rss = 5865;
const external = 9001;
const arrayBuffers = 42;

jest.spyOn(process, 'memoryUsage').mockImplementation(() => ({
rss,
heapTotal,
heapUsed,
external: 0,
arrayBuffers: 0,
external,
arrayBuffers,
}));

jest.spyOn(v8, 'getHeapStatistics').mockImplementation(
Expand All @@ -83,6 +86,8 @@ describe('ProcessMetricsCollector', () => {
expect(metrics[0].memory.heap.used_in_bytes).toEqual(heapUsed);
expect(metrics[0].memory.heap.size_limit).toEqual(heapSizeLimit);
expect(metrics[0].memory.resident_set_size_in_bytes).toEqual(rss);
expect(metrics[0].memory.external_in_bytes).toEqual(external);
expect(metrics[0].memory.array_buffers_in_bytes).toEqual(arrayBuffers);
});
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetri
size_limit: heapStats.heap_size_limit,
},
resident_set_size_in_bytes: memoryUsage.rss,
array_buffers_in_bytes: memoryUsage.arrayBuffers,
external_in_bytes: memoryUsage.external,
},
pid: process.pid,
event_loop_delay: eventLoopDelayHistogram.mean,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ export function createMockOpsProcessMetrics(): OpsProcessMetrics {
memory: {
heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 },
resident_set_size_in_bytes: 1,
external_in_bytes: 1,
array_buffers_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,15 @@ function createMockOpsMetrics(testMetrics: Partial<OpsMetrics>): OpsMetrics {
...testMetrics,
};
}

const testMetrics = {
process: {
memory: { heap: { used_in_bytes: 100 } },
memory: {
heap: { used_in_bytes: 100, total_in_bytes: 200, size_limit: 300 },
resident_set_size_in_bytes: 400,
external_in_bytes: 500,
array_buffers_in_bytes: 600,
},
uptime_in_millis: 1500,
event_loop_delay: 50,
event_loop_delay_histogram: { percentiles: { '50': 50, '75': 75, '95': 95, '99': 99 } },
Expand Down Expand Up @@ -127,9 +133,14 @@ describe('getEcsOpsMetricsLog', () => {
"utilization": 0.6365329598160299,
},
"memory": Object {
"arrayBuffersInBytes": 600,
"externalInBytes": 500,
"heap": Object {
"sizeLimit": 300,
"totalInBytes": 200,
"usedInBytes": 100,
},
"residentSetSizeInBytes": 400,
},
"uptime": 1,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@ export function getEcsOpsMetricsLog(metrics: OpsMetrics) {
memory: {
heap: {
usedInBytes: processMemoryUsedInBytes,
totalInBytes: process?.memory?.heap.total_in_bytes,
sizeLimit: process?.memory?.heap.size_limit,
},
residentSetSizeInBytes: process?.memory?.resident_set_size_in_bytes,
externalInBytes: process?.memory?.external_in_bytes,
arrayBuffersInBytes: process?.memory?.array_buffers_in_bytes,
},
eventLoopDelay: eventLoopDelayVal,
eventLoopDelayHistogram: eventLoopDelayHistVals,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,14 @@ describe('MetricsService', () => {
"eventLoopDelayHistogram": undefined,
"eventLoopUtilization": undefined,
"memory": Object {
"arrayBuffersInBytes": undefined,
"externalInBytes": undefined,
"heap": Object {
"sizeLimit": undefined,
"totalInBytes": undefined,
"usedInBytes": undefined,
},
"residentSetSizeInBytes": undefined,
},
"uptime": undefined,
},
Expand Down
Loading

0 comments on commit 4131708

Please sign in to comment.