From f3b726053ea971e98527ea82d7f8cce2bfabb3e0 Mon Sep 17 00:00:00 2001 From: Frey Alfredsson Date: Tue, 2 Apr 2024 13:20:20 +0200 Subject: [PATCH] Added support for per CPU core monitoring This commit adds multicore support to the stat_iterate.sh script and to the CpuStatsRunner. The previous script only read the first entry of the /proc/stat, which gave the average CPU usage for all cores. This modified script reads all cpu[0-9]+ records, including the average. It retains the output format of the original script; however, the implementation has some minor optimizations, which are not that significant due to how seldom the script runs. 1. It primarily relies on the awk command for the heavy lifting, making it easier to read. Awk handles reading the /proc/stat instead of the cat command. 2. The previous script used the seq command, which consumes more memory when multiple iterations occur. Awk handles this instead with a loop. I tested the script using gawk with and without the Posix compliance flag (-P) and BusyBox v1.36.1 awk. Signed-off-by: Frey Alfredsson --- flent/runners.py | 15 +++++++---- flent/scripts/stat_iterate.sh | 49 +++++++++++++++++++++++++++++------ flent/tests/cpu_stats.inc | 21 +++++++++++++++ unittests/test_plotters.py | 3 ++- 4 files changed, 74 insertions(+), 14 deletions(-) diff --git a/flent/runners.py b/flent/runners.py index a5770e36..5a85a042 100644 --- a/flent/runners.py +++ b/flent/runners.py @@ -2326,9 +2326,12 @@ class CpuStatsRunner(ProcessRunner): separated by '\n---\n and a timestamp to be present in the form 'Time: xxxxxx.xxx' (e.g. the output of `date '+Time: %s.%N'`). + The first line is the total CPU load, and the following lines are the load of + each core. """ + time_re = re.compile(r"^Time: (?P\d+\.\d+)", re.MULTILINE) - value_re = re.compile(r"^\d+ \d+ (?P\d+\.\d+)$", re.MULTILINE) + value_re = re.compile(r"^cpu(?P\d+)?: (?P\d+\.\d+)", re.MULTILINE) def __init__(self, interval, length, host='localhost', **kwargs): self.interval = interval @@ -2341,8 +2344,6 @@ def parse(self, output, error): raw_values = [] metadata = {} for part in self.split_stream(output): - # Split out individual qdisc entries (in case there are more than - # one). If so, discard the root qdisc and sum the rest. timestamp = self.time_re.search(part) if timestamp is None: continue @@ -2351,10 +2352,14 @@ def parse(self, output, error): if value is None: continue + matches = {} - for k, v in list(value.groupdict().items()): - v = float(v) + for m in self.value_re.finditer(part): + core_nr = m.group("core_nr") + load = m.group("load") + k = f'cpu{core_nr}' if core_nr is not None else 'load' + v = float(load) if k not in matches: matches[k] = v else: diff --git a/flent/scripts/stat_iterate.sh b/flent/scripts/stat_iterate.sh index 087580fc..94f8afaa 100755 --- a/flent/scripts/stat_iterate.sh +++ b/flent/scripts/stat_iterate.sh @@ -14,17 +14,50 @@ done # $5 is IDLE, $6 is IOWAIT; we count both as idle time command_string=$(cat <0) {print \$5+\$6-idle " " sum-total " " 1-(\$5+\$6-idle)/(sum-total);} - idle=\$5+\$6; total=sum - } -\$1 == "Time:" { print "---\n" \$0 }' +set -o noglob +awk -v COUNT=$count -v INTERVAL=$interval ' +function get_cpu_usage(count) { + FS = " "; + IDLE_FIELD = 5; + IOWAIT_FIELD = 6; + PROC_CPU = "/proc/stat"; + while ((getline < PROC_CPU) > 0) { + if (\$0 !~ /^cpu/) + break; + cpu_idle_prev[\$1] = cpu_idle[\$1]; + cpu_total_prev[\$1] = cpu_total[\$1]; + cpu_idle[\$1] = 0; + cpu_total[\$1] = 0; + for (i = 2; i <= NF; i++) { + if (i == IDLE_FIELD || i == IOWAIT_FIELD) + cpu_idle[\$1] += \$i; + cpu_total[\$1] += \$i; + } + idle = cpu_idle[\$1] - cpu_idle_prev[\$1]; + total = cpu_total[\$1] - cpu_total_prev[\$1]; + cpu_usage = (total != 0) ? (1 - (idle / total)) : 0 + if (count) + printf("%s: %f\n", \$1, cpu_usage); + } + close(PROC_CPU); +} + +BEGIN { + date_cmd = "date \"+Time: %s.%N\"" + for (loop = 0; loop < COUNT; loop++) { + print("---"); + date_cmd | getline date; + print(date); + close(date_cmd); + get_cpu_usage(loop); + system("sleep " INTERVAL); + } +}' EOF ) if [ "$host" == "localhost" ]; then - eval $command_string + eval "$command_string" else - echo $command_string | ssh $host sh + echo "$command_string" | ssh "$host" sh fi diff --git a/flent/tests/cpu_stats.inc b/flent/tests/cpu_stats.inc index c67af13a..6875eda6 100644 --- a/flent/tests/cpu_stats.inc +++ b/flent/tests/cpu_stats.inc @@ -14,6 +14,7 @@ for host in CPU_STATS_HOSTS: if CPU_STATS_HOSTS: + # Average CPU load PLOTS['cpu'] = {'description': 'CPU stats', 'type': 'timeseries', 'axis_labels': ['Load'], @@ -26,6 +27,7 @@ if CPU_STATS_HOSTS: PLOTS['cpu_box'] = {'description': 'CPU stats (box plot)', 'type': 'box', 'parent': 'cpu'} + PLOTS['cpu_bar'] = {'description': 'CPU stats (bar plot)', 'type': 'bar', 'parent': 'cpu'} @@ -42,3 +44,22 @@ if CPU_STATS_HOSTS: PLOTS['cpu_bar_combine'] = {'description': 'CPU stats (bar combine plot)', 'type': 'bar_combine', 'parent': 'cpu_box_combine'} + + + # Per core CPU load + PLOTS['cpu_core'] = {'description': 'Per core CPU stats', + 'type': 'timeseries', + 'axis_labels': ['Load'], + 'series': [ + {'data': glob('cpu_stats_*'), + 'raw_key': glob('cpu*'), + 'label': 'CPU core load'}, + ]} + + PLOTS['cpu_core_box'] = {'description': 'Per core CPU stats (box plot)', + 'type': 'box', + 'parent': 'cpu_core'} + + PLOTS['cpu_core_bar'] = {'description': 'Per core CPU stats (bar plot)', + 'type': 'bar', + 'parent': 'cpu_core'} diff --git a/unittests/test_plotters.py b/unittests/test_plotters.py index aad9e477..fce54714 100644 --- a/unittests/test_plotters.py +++ b/unittests/test_plotters.py @@ -71,7 +71,8 @@ # Plots that may fail validation PLOTS_MAY_FAIL = set(('tcp_cwnd', 'tcp_rtt', 'tcp_rtt_cdf', 'tcp_rtt_box_combine', 'tcp_rtt_bar_combine', 'tcp_pacing', - 'all_scaled_delivery', 'tcp_delivery_rate', 'tcp_delivery_with_rtt')) + 'all_scaled_delivery', 'tcp_delivery_rate', 'tcp_delivery_with_rtt', + 'cpu_core', 'cpu_core_box', 'cpu_core_bar')) class PlottersTestCase(ForkingTestCase):