From f3b726053ea971e98527ea82d7f8cce2bfabb3e0 Mon Sep 17 00:00:00 2001
From: Frey Alfredsson <freysteinn@freysteinn.com>
Date: Tue, 2 Apr 2024 13:20:20 +0200
Subject: [PATCH] Added support for per CPU core monitoring

This commit adds multicore support to the stat_iterate.sh script and to
the CpuStatsRunner. The previous script only read the first entry of the
/proc/stat, which gave the average CPU usage for all cores. This
modified script reads all cpu[0-9]+ records, including the average. It
retains the output format of the original script; however, the
implementation has some minor optimizations, which are not that
significant due to how seldom the script runs.

1. It primarily relies on the awk command for the heavy lifting, making
   it easier to read. Awk handles reading the /proc/stat instead of the
cat command.

2. The previous script used the seq command, which consumes more memory
   when multiple iterations occur. Awk handles this instead with a loop.

I tested the script using gawk with and without the Posix compliance
flag (-P) and BusyBox v1.36.1 awk.

Signed-off-by: Frey Alfredsson <freysteinn@freysteinn.com>
---
 flent/runners.py              | 15 +++++++----
 flent/scripts/stat_iterate.sh | 49 +++++++++++++++++++++++++++++------
 flent/tests/cpu_stats.inc     | 21 +++++++++++++++
 unittests/test_plotters.py    |  3 ++-
 4 files changed, 74 insertions(+), 14 deletions(-)
diff --git a/flent/runners.py b/flent/runners.py
index a5770e36..5a85a042 100644
--- a/flent/runners.py
+++ b/flent/runners.py
@@ -2326,9 +2326,12 @@ class CpuStatsRunner(ProcessRunner):
     separated by '\n---\n and a timestamp to be present in the form 'Time:
     xxxxxx.xxx' (e.g. the output of `date '+Time: %s.%N'`).
 
+    The first line is the total CPU load, and the following lines are the load of
+    each core.
     """
+
     time_re = re.compile(r"^Time: (?P<timestamp>\d+\.\d+)", re.MULTILINE)
-    value_re = re.compile(r"^\d+ \d+ (?P<load>\d+\.\d+)$", re.MULTILINE)
+    value_re = re.compile(r"^cpu(?P<core_nr>\d+)?: (?P<load>\d+\.\d+)", re.MULTILINE)
 
     def __init__(self, interval, length, host='localhost', **kwargs):
         self.interval = interval
@@ -2341,8 +2344,6 @@ def parse(self, output, error):
         raw_values = []
         metadata = {}
         for part in self.split_stream(output):
-            # Split out individual qdisc entries (in case there are more than
-            # one). If so, discard the root qdisc and sum the rest.
             timestamp = self.time_re.search(part)
             if timestamp is None:
                 continue
@@ -2351,10 +2352,14 @@ def parse(self, output, error):
 
             if value is None:
                 continue
+
             matches = {}
 
-            for k, v in list(value.groupdict().items()):
-                v = float(v)
+            for m in self.value_re.finditer(part):
+                core_nr = m.group("core_nr")
+                load = m.group("load")
+                k = f'cpu{core_nr}' if core_nr is not None else 'load'
+                v = float(load)
                 if k not in matches:
                     matches[k] = v
                 else:
diff --git a/flent/scripts/stat_iterate.sh b/flent/scripts/stat_iterate.sh
index 087580fc..94f8afaa 100755
--- a/flent/scripts/stat_iterate.sh
+++ b/flent/scripts/stat_iterate.sh
@@ -14,17 +14,50 @@ done
 
 # $5 is IDLE, $6 is IOWAIT; we count both as idle time
 command_string=$(cat <<EOF
-(for x in \$(seq $count); do date '+Time: %s.%N'; cat /proc/stat; sleep $interval ;done ) | awk 'BEGIN {idle=0; total=0}
-\$1 == "cpu" { sum=0; for (i=2;i<=NF;i++) { sum+=\$i };
-              if(total>0) {print \$5+\$6-idle " " sum-total " " 1-(\$5+\$6-idle)/(sum-total);}
-              idle=\$5+\$6; total=sum
-            }
-\$1 == "Time:" { print "---\n" \$0 }'
+set -o noglob
+awk -v COUNT=$count -v INTERVAL=$interval '
+function get_cpu_usage(count) {
+    FS = " ";
+    IDLE_FIELD = 5;
+    IOWAIT_FIELD = 6;
+    PROC_CPU = "/proc/stat";
+    while ((getline < PROC_CPU) > 0) {
+        if (\$0 !~ /^cpu/)
+            break;
+        cpu_idle_prev[\$1] = cpu_idle[\$1];
+        cpu_total_prev[\$1] = cpu_total[\$1];
+        cpu_idle[\$1] = 0;
+        cpu_total[\$1] = 0;
+        for (i = 2; i <= NF; i++) {
+            if (i == IDLE_FIELD || i == IOWAIT_FIELD)
+                cpu_idle[\$1] += \$i;
+            cpu_total[\$1] += \$i;
+        }
+        idle = cpu_idle[\$1] - cpu_idle_prev[\$1];
+        total = cpu_total[\$1] - cpu_total_prev[\$1];
+        cpu_usage = (total != 0) ? (1 - (idle / total)) : 0
+        if (count)
+            printf("%s: %f\n", \$1, cpu_usage);
+    }
+    close(PROC_CPU);
+}
+
+BEGIN {
+    date_cmd = "date \"+Time: %s.%N\""
+    for (loop = 0; loop < COUNT; loop++) {
+        print("---");
+        date_cmd | getline date;
+        print(date);
+        close(date_cmd);
+        get_cpu_usage(loop);
+        system("sleep " INTERVAL);
+    }
+}'
 EOF
 )
 
 if [ "$host" == "localhost" ]; then
-    eval $command_string
+    eval "$command_string"
 else
-    echo $command_string | ssh $host sh
+    echo "$command_string" | ssh "$host" sh
 fi
diff --git a/flent/tests/cpu_stats.inc b/flent/tests/cpu_stats.inc
index c67af13a..6875eda6 100644
--- a/flent/tests/cpu_stats.inc
+++ b/flent/tests/cpu_stats.inc
@@ -14,6 +14,7 @@ for host in CPU_STATS_HOSTS:
 
 if CPU_STATS_HOSTS:
 
+    # Average CPU load
     PLOTS['cpu'] = {'description': 'CPU stats',
                     'type': 'timeseries',
                     'axis_labels': ['Load'],
@@ -26,6 +27,7 @@ if CPU_STATS_HOSTS:
     PLOTS['cpu_box'] = {'description': 'CPU stats (box plot)',
                         'type': 'box',
                         'parent': 'cpu'}
+
     PLOTS['cpu_bar'] = {'description': 'CPU stats (bar plot)',
                         'type': 'bar',
                         'parent': 'cpu'}
@@ -42,3 +44,22 @@ if CPU_STATS_HOSTS:
     PLOTS['cpu_bar_combine'] = {'description': 'CPU stats (bar combine plot)',
                                 'type': 'bar_combine',
                                 'parent': 'cpu_box_combine'}
+
+
+    # Per core CPU load
+    PLOTS['cpu_core'] = {'description': 'Per core CPU stats',
+                    'type': 'timeseries',
+                    'axis_labels': ['Load'],
+                    'series': [
+                        {'data': glob('cpu_stats_*'),
+                         'raw_key': glob('cpu*'),
+                         'label': 'CPU core load'},
+                    ]}
+
+    PLOTS['cpu_core_box'] = {'description': 'Per core CPU stats (box plot)',
+                        'type': 'box',
+                        'parent': 'cpu_core'}
+
+    PLOTS['cpu_core_bar'] = {'description': 'Per core CPU stats (bar plot)',
+                        'type': 'bar',
+                        'parent': 'cpu_core'}
diff --git a/unittests/test_plotters.py b/unittests/test_plotters.py
index aad9e477..fce54714 100644
--- a/unittests/test_plotters.py
+++ b/unittests/test_plotters.py
@@ -71,7 +71,8 @@
 # Plots that may fail validation
 PLOTS_MAY_FAIL = set(('tcp_cwnd', 'tcp_rtt', 'tcp_rtt_cdf',
                       'tcp_rtt_box_combine', 'tcp_rtt_bar_combine', 'tcp_pacing',
-                      'all_scaled_delivery', 'tcp_delivery_rate', 'tcp_delivery_with_rtt'))
+                      'all_scaled_delivery', 'tcp_delivery_rate', 'tcp_delivery_with_rtt',
+                      'cpu_core', 'cpu_core_box', 'cpu_core_bar'))
 
 class PlottersTestCase(ForkingTestCase):