From 1cb9072fa743b5579048e278a2496a94d92b5225 Mon Sep 17 00:00:00 2001
From: James Curtis <jxcurtis@amazon.co.uk>
Date: Mon, 19 Aug 2024 09:06:23 +0000
Subject: [PATCH] Create performance tests for hotplug latency

Create performance tests that measure the latency of hotplugging using
both a udev rule and an a userspace agent to online vCPUs.

Signed-off-by: James Curtis <jxcurtis@amazon.co.uk>
---
 src/vmm/src/lib.rs                            |   1 -
 tests/conftest.py                             |   3 +
 tests/framework/http_api.py                   |   1 +
 tests/host_tools/1-cpu-hotplug.rules          |   1 +
 tests/host_tools/hotplug.sh                   |  17 ++
 tests/host_tools/hotplug_time.c               |  33 +++
 tests/host_tools/hotplug_udev.sh              |   9 +
 .../performance/test_vcpu_hotplug.py          | 195 ++++++++++++++++++
 8 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 tests/host_tools/1-cpu-hotplug.rules
 create mode 100644 tests/host_tools/hotplug.sh
 create mode 100644 tests/host_tools/hotplug_time.c
 create mode 100644 tests/host_tools/hotplug_udev.sh
 create mode 100644 tests/integration_tests/performance/test_vcpu_hotplug.py
diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs
index 0d74bfd2276..f6f27081ab5 100644
--- a/src/vmm/src/lib.rs
+++ b/src/vmm/src/lib.rs
@@ -683,7 +683,6 @@ impl Vmm {
 
         self.acpi_device_manager.notify_cpu_container()?;
 
-        #[cfg(test)]
         if let Some(devices::BusDevice::BootTimer(timer)) =
             self.get_bus_device(DeviceType::BootTimer, "BootTimer")
         {
diff --git a/tests/conftest.py b/tests/conftest.py
index f7f3c8b6165..1800c684ffd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -377,6 +377,9 @@ def rootfs_fxt(request, record_property):
 guest_kernel_linux_5_10 = pytest.fixture(
     guest_kernel_fxt, params=kernel_params("vmlinux-5.10*")
 )
+guest_kernel_linux_acpi_only = pytest.fixture(
+    guest_kernel_fxt, params=kernel_params("vmlinux-5.10.219")
+)
 # Use the unfiltered selector, since we don't officially support 6.1 yet.
 # TODO: switch to default selector once we add full 6.1 support.
 guest_kernel_linux_6_1 = pytest.fixture(
diff --git a/tests/framework/http_api.py b/tests/framework/http_api.py
index a1ee37174b0..c4a576853ad 100644
--- a/tests/framework/http_api.py
+++ b/tests/framework/http_api.py
@@ -123,3 +123,4 @@ def __init__(self, api_usocket_full_name):
         self.snapshot_load = Resource(self, "/snapshot/load")
         self.cpu_config = Resource(self, "/cpu-config")
         self.entropy = Resource(self, "/entropy")
+        self.hotplug = Resource(self, "/hotplug")
diff --git a/tests/host_tools/1-cpu-hotplug.rules b/tests/host_tools/1-cpu-hotplug.rules
new file mode 100644
index 00000000000..d791cc4802a
--- /dev/null
+++ b/tests/host_tools/1-cpu-hotplug.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="cpu", ACTION=="add", ATTR{online}!="1", ATTR{online}="1"
diff --git a/tests/host_tools/hotplug.sh b/tests/host_tools/hotplug.sh
new file mode 100644
index 00000000000..c7e4c824ba4
--- /dev/null
+++ b/tests/host_tools/hotplug.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+while :; do
+  [[ -d /sys/devices/system/cpu/cpu$1 ]] && break
+done
+
+for i in $(seq 1 $1); do
+  echo 1 >/sys/devices/system/cpu/cpu$i/online
+done
+
+while :; do
+  [[ $(nproc) == $((1 + $1)) ]] && break
+done
+
+/home/hotplug_time.o
diff --git a/tests/host_tools/hotplug_time.c b/tests/host_tools/hotplug_time.c
new file mode 100644
index 00000000000..f91cc6adf78
--- /dev/null
+++ b/tests/host_tools/hotplug_time.c
@@ -0,0 +1,33 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// Init wrapper for boot timing. It points at /sbin/init.
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// Base address values are defined in arch/src/lib.rs as arch::MMIO_MEM_START.
+// Values are computed in arch/src/<arch>/mod.rs from the architecture layouts.
+// Position on the bus is defined by MMIO_LEN increments, where MMIO_LEN is
+// defined as 0x1000 in vmm/src/device_manager/mmio.rs.
+#ifdef __x86_64__
+#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0xd0000000
+#endif
+#ifdef __aarch64__
+#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0x40000000
+#endif
+
+#define MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE 123
+
+int main() {
+  int fd = open("/dev/mem", (O_RDWR | O_SYNC | O_CLOEXEC));
+  int mapped_size = getpagesize();
+
+  char *map_base = mmap(NULL, mapped_size, PROT_WRITE, MAP_SHARED, fd,
+                        MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE);
+
+  *map_base = MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE;
+  msync(map_base, mapped_size, MS_ASYNC);
+}
diff --git a/tests/host_tools/hotplug_udev.sh b/tests/host_tools/hotplug_udev.sh
new file mode 100644
index 00000000000..34a19d19943
--- /dev/null
+++ b/tests/host_tools/hotplug_udev.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+while :; do
+  [[ $(nproc) == $((1 + $1)) ]] && break
+done
+
+/home/hotplug_time.o
diff --git a/tests/integration_tests/performance/test_vcpu_hotplug.py b/tests/integration_tests/performance/test_vcpu_hotplug.py
new file mode 100644
index 00000000000..376dbc5233e
--- /dev/null
+++ b/tests/integration_tests/performance/test_vcpu_hotplug.py
@@ -0,0 +1,195 @@
+# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Testing hotplug performance"""
+
+import os
+import platform
+import re
+import time
+from pathlib import Path
+
+import pandas
+import pytest
+
+from framework.utils_cpuid import check_guest_cpuid_output
+from host_tools.cargo_build import gcc_compile
+
+
+@pytest.mark.nonci
+@pytest.mark.skipif(
+    platform.machine() != "x86_64", reason="Hotplug only enabled on x86_64."
+)
+@pytest.mark.parametrize(
+    "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+)
+def test_custom_udev_rule_latency(
+    microvm_factory,
+    guest_kernel_linux_6_1,
+    rootfs_rw,
+    vcpu_count,
+    results_dir,
+    test_fc_session_root_path,
+):
+    """Test the latency for hotplugging and booting CPUs in the guest"""
+    hotplug_time_path = os.path.join(test_fc_session_root_path, "hotplug_time.o")
+    gcc_compile(Path("./host_tools/hotplug_time.c"), hotplug_time_path)
+    data = []
+    for _ in range(20):
+        uvm_hotplug = microvm_factory.build(guest_kernel_linux_6_1, rootfs_rw)
+        uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None})
+        uvm_hotplug.help.enable_console()
+        uvm_hotplug.spawn()
+        uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
+        uvm_hotplug.add_net_iface()
+        uvm_hotplug.start()
+        uvm_hotplug.ssh.scp_put(
+            Path("./host_tools/hotplug_udev.sh"), Path("/home/hotplug_udev.sh")
+        )
+        uvm_hotplug.ssh.scp_put(hotplug_time_path, Path("/home/hotplug_time.o"))
+        uvm_hotplug.ssh.scp_put(
+            Path("./host_tools/1-cpu-hotplug.rules"),
+            Path("/usr/lib/udev/rules.d/1-cpu-hotplug.rules"),
+        )
+        uvm_hotplug.ssh.run(
+            f"udevadm control --reload-rules && tmux new-session -d /bin/bash /home/hotplug_udev.sh {vcpu_count}"
+        )
+
+        uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
+        time.sleep(5)
+
+        # Extract API call duration
+        api_duration = (
+            float(
+                re.findall(
+                    r"Total previous API call duration: (\d+) us\.",
+                    uvm_hotplug.log_data,
+                )[-1]
+            )
+            / 1000
+        )
+        try:
+            timestamp = (
+                float(
+                    re.findall(
+                        r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data
+                    )[0]
+                )
+                / 1000
+            )
+        except IndexError:
+            uvm_hotplug.kill()
+            data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": None})
+            continue
+
+        data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": timestamp})
+
+        check_guest_cpuid_output(
+            uvm_hotplug,
+            "lscpu",
+            None,
+            ":",
+            {
+                "CPU(s)": str(1 + vcpu_count),
+                "On-line CPU(s) list": f"0-{vcpu_count}",
+            },
+        )
+        uvm_hotplug.kill()
+
+    output_file = results_dir / f"hotplug-{vcpu_count}.csv"
+
+    csv_data = pandas.DataFrame.from_dict(data).to_csv(
+        index=False,
+        float_format="%.3f",
+    )
+
+    output_file.write_text(csv_data)
+
+
+@pytest.mark.nonci
+@pytest.mark.skipif(
+    platform.machine() != "x86_64", reason="Hotplug only enabled on x86_64."
+)
+@pytest.mark.parametrize(
+    "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+)
+def test_manual_latency(
+    microvm_factory,
+    guest_kernel_linux_6_1,
+    rootfs_rw,
+    vcpu_count,
+    results_dir,
+    test_fc_session_root_path,
+):
+    """Test the latency for hotplugging and booting CPUs in the guest"""
+
+    hotplug_time_path = os.path.join(test_fc_session_root_path, "hotplug_time.o")
+    gcc_compile(Path("./host_tools/hotplug_time.c"), hotplug_time_path)
+    data = []
+    for _ in range(20):
+        uvm_hotplug = microvm_factory.build(guest_kernel_linux_6_1, rootfs_rw)
+        uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None})
+        uvm_hotplug.help.enable_console()
+        uvm_hotplug.spawn()
+        uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128)
+        uvm_hotplug.add_net_iface()
+        uvm_hotplug.start()
+
+        uvm_hotplug.ssh.scp_put(
+            Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh")
+        )
+        uvm_hotplug.ssh.scp_put(hotplug_time_path, Path("/home/hotplug_time.o"))
+        uvm_hotplug.ssh.run(
+            f"tmux new-session -d /bin/bash /home/hotplug.sh {vcpu_count}"
+        )
+
+        uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count})
+
+        time.sleep(5)
+        # Extract API call duration
+        api_duration = (
+            float(
+                re.findall(
+                    r"Total previous API call duration: (\d+) us\.",
+                    uvm_hotplug.log_data,
+                )[-1]
+            )
+            / 1000
+        )
+        try:
+            timestamp = (
+                float(
+                    re.findall(
+                        r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data
+                    )[0]
+                )
+                / 1000
+            )
+        except IndexError:
+            data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": None})
+            uvm_hotplug.kill()
+            continue
+
+        data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": timestamp})
+
+        check_guest_cpuid_output(
+            uvm_hotplug,
+            "lscpu",
+            None,
+            ":",
+            {
+                "CPU(s)": str(1 + vcpu_count),
+                "On-line CPU(s) list": f"0-{vcpu_count}",
+            },
+        )
+
+        uvm_hotplug.kill()
+
+    output_file = results_dir / f"hotplug-{vcpu_count}.csv"
+
+    csv_data = pandas.DataFrame.from_dict(data).to_csv(
+        index=False,
+        float_format="%.3f",
+    )
+
+    output_file.write_text(csv_data)