diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index f64e2cce877a..5b3f2b6f5eac 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -185,7 +185,28 @@ fn create_vmm_and_vcpus( let resource_allocator = ResourceAllocator::new()?; // Instantiate the MMIO device manager. - let mmio_device_manager = MMIODeviceManager::new(); + let mut mmio_device_manager = MMIODeviceManager::new(); + + #[cfg(target_arch = "x86_64")] + { + // For x86, we need to create the interrupt controller before calling `KVM_CREATE_VCPUS`, + // but we also need it before the instantiation of the ACPI Device manager, + // this is because the CpuContainer needs to create and register IRQs + setup_interrupt_controller(&mut vm)?; + + // The boot timer device needs to be the first device attached in order + // to maintain the same MMIO address referenced in the documentation + // and tests. + // This has to instantiated here, before the CpuContainer, to ensure that it gets the + // correct address, the first page of MMIO memory. + if boot_timer_enabled { + let mut boot_timer = crate::devices::pseudo::BootTimer::new(TimestampUs::default()); + + mmio_device_manager + .register_mmio_boot_timer(&mut resource_allocator, boot_timer) + .map_err(RegisterMmioDevice)?; + } + } // Instantiate ACPI device manager. #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/devices/pseudo/boot_timer.rs b/src/vmm/src/devices/pseudo/boot_timer.rs index ba16e92355f0..255ec6f28017 100644 --- a/src/vmm/src/devices/pseudo/boot_timer.rs +++ b/src/vmm/src/devices/pseudo/boot_timer.rs @@ -10,7 +10,7 @@ const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123; /// Pseudo device to record the kernel boot time. #[derive(Debug)] pub struct BootTimer { - start_ts: TimestampUs, + pub start_ts: TimestampUs, } impl BootTimer { diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 944bc1929df8..25758506e586 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -621,6 +621,8 @@ impl Vmm { &mut self, config: HotplugVcpuConfig, ) -> Result { + use utils::time::TimestampUs; + use crate::logger::IncMetric; if config.add < 1 { return Err(HotplugVcpuError::VcpuCountTooLow); @@ -689,6 +691,12 @@ impl Vmm { self.resume_vcpu_threads(start_idx.into())?; self.acpi_device_manager.notify_cpu_container()?; + if let Some(devices::BusDevice::BootTimer(dev)) = self + .mmio_device_manager + .get_device(DeviceType::BootTimer, "BootTimer") + { + dev.lock().unwrap().start_ts = TimestampUs::default() + } Ok(new_machine_config) } diff --git a/tests/conftest.py b/tests/conftest.py index bcd9c1f69802..1e1766d8df0b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -361,6 +361,9 @@ def rootfs_fxt(request, record_property): guest_kernel_linux_5_10 = pytest.fixture( guest_kernel_fxt, params=kernel_params("vmlinux-5.10*") ) +guest_kernel_linux_acpi_only = pytest.fixture( + guest_kernel_fxt, params=kernel_params("vmlinux-5.10.221") +) # Use the unfiltered selector, since we don't officially support 6.1 yet. # TODO: switch to default selector once we add full 6.1 support. guest_kernel_linux_6_1 = pytest.fixture( @@ -394,6 +397,11 @@ def uvm_plain_rw(microvm_factory, guest_kernel_linux_5_10, rootfs_rw): return microvm_factory.build(guest_kernel_linux_5_10, rootfs_rw) +@pytest.fixture +def uvm_hotplug(microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw): + return microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw) + + @pytest.fixture def uvm_nano(uvm_plain): """A preconfigured uvm with 2vCPUs and 256MiB of memory diff --git a/tests/host_tools/1-cpu-hotplug.rules b/tests/host_tools/1-cpu-hotplug.rules new file mode 100644 index 000000000000..d791cc4802a7 --- /dev/null +++ b/tests/host_tools/1-cpu-hotplug.rules @@ -0,0 +1 @@ +SUBSYSTEM=="cpu", ACTION=="add", ATTR{online}!="1", ATTR{online}="1" diff --git a/tests/host_tools/hotplug.py b/tests/host_tools/hotplug.py new file mode 100644 index 000000000000..1f2d0743e28a --- /dev/null +++ b/tests/host_tools/hotplug.py @@ -0,0 +1,70 @@ +# import pandas +# import re +# from framework.microvm import MicroVMFactory +# +# KERNEL = "vmlinux-5.10.221" +# ROOTFS = "ubuntu-22.04.ext4" +# +# +# def run_tests(): +# factory = MicrovmFactory(fc_binary_path, jailer_binary_path) +# manual_data = test_manual_latency(factory) +# manual_data.to_csv("~/dev/results/manual_hotplug_data.csv") +# +# def test_manual_latency(microvm_factory): +# """Test the latency for hotplugging and booting CPUs in the guest""" +# fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries() +# df = pandas.DataFrame(columns=["vcpus", "api", "onlining"]) +# gcc_compile(Path("./hotplug_time.c"), Path("./hotplug_time.o")) +# data = [] +# for vcpu_count in range(2, 30, 2): +# for i in range(50): +# uvm_hotplug = microvm_factory.build(KERNEL, ROOTFS) +# uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None}) +# uvm_hotplug.help.enable_console() +# uvm_hotplug.spawn() +# uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128) +# uvm_hotplug.add_net_iface() +# uvm_hotplug.start() +# uvm_hotplug.ssh.scp_put(Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh")) +# uvm_hotplug.ssh.scp_put(Path("./host_tools//hotplug_time.o"), Path("/home/hotplug_time.o")) +# uvm_hotplug.ssh.run("tmux new-session -d /bin/bash /home/hotplug.sh > /home/test 2>&1") +# +# +# uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count}) +# +# time.sleep(0.25) +# # Extract API call duration +# api_duration = float(re.findall(r"Total previous API call duration: (\d+) us\.", uvm_hotplug.log_data)[-1]) / 1000 +# timestamp = float(re.findall(r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data)[0]) / 1000 +# data.append({"vcpus" : vcpu_count, "api": api_duration, "onlining": timestamp}) +# return pandas.DataFrame.from_dict(data) + +# def test_custom_udev_latency(): +# """Test the latency for hotplugging and booting CPUs in the guest""" +# fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries() +# df = pandas.DataFrame(columns=["vcpus", "api", "onlining"]) +# gcc_compile(Path("./hotplug_time.c"), Path("./hotplug_time.o")) +# data = [] +# for vcpu_count in range(2, 30, 2): +# for i in range(50): +# uvm_hotplug = microvm_factory.build(KERNEL, ROOTFS) +# uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None}) +# uvm_hotplug.help.enable_console() +# uvm_hotplug.spawn() +# uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128) +# uvm_hotplug.add_net_iface() +# uvm_hotplug.start() +# uvm_hotplug.ssh.scp_put(Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh")) +# uvm_hotplug.ssh.scp_put(Path("./host_tools//hotplug_time.o"), Path("/home/hotplug_time.o")) +# uvm_hotplug.ssh.run("tmux new-session -d /bin/bash /home/hotplug.sh > /home/test 2>&1") +# +# +# uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count}) +# +# time.sleep(0.25) +# # Extract API call duration +# api_duration = float(re.findall(r"Total previous API call duration: (\d+) us\.", uvm_hotplug.log_data)[-1]) / 1000 +# timestamp = float(re.findall(r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data)[0]) / 1000 +# data.append({"vcpus" : vcpu_count, "api": api_duration, "onlining": timestamp}) +# diff --git a/tests/host_tools/hotplug.sh b/tests/host_tools/hotplug.sh new file mode 100644 index 000000000000..fdf20f3a8dba --- /dev/null +++ b/tests/host_tools/hotplug.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +while :; do + [[ -d /sys/devices/system/cpu/cpu1 ]] && break +done + +readarray -t offline_cpus < <(lscpu -p=cpu --offline | sed '/^#/d') + +for cpu_idx in ${offline_cpus[@]}; do + echo 1 >/sys/devices/system/cpu/cpu$cpu_idx/online +done + +/home/hotplug_time.o diff --git a/tests/host_tools/hotplug_time.c b/tests/host_tools/hotplug_time.c new file mode 100644 index 000000000000..f91cc6adf78c --- /dev/null +++ b/tests/host_tools/hotplug_time.c @@ -0,0 +1,33 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Init wrapper for boot timing. It points at /sbin/init. + +#include +#include +#include +#include + +// Base address values are defined in arch/src/lib.rs as arch::MMIO_MEM_START. +// Values are computed in arch/src//mod.rs from the architecture layouts. +// Position on the bus is defined by MMIO_LEN increments, where MMIO_LEN is +// defined as 0x1000 in vmm/src/device_manager/mmio.rs. +#ifdef __x86_64__ +#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0xd0000000 +#endif +#ifdef __aarch64__ +#define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0x40000000 +#endif + +#define MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE 123 + +int main() { + int fd = open("/dev/mem", (O_RDWR | O_SYNC | O_CLOEXEC)); + int mapped_size = getpagesize(); + + char *map_base = mmap(NULL, mapped_size, PROT_WRITE, MAP_SHARED, fd, + MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE); + + *map_base = MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE; + msync(map_base, mapped_size, MS_ASYNC); +} diff --git a/tests/host_tools/hotplug_time.o b/tests/host_tools/hotplug_time.o new file mode 100755 index 000000000000..272f5f414043 Binary files /dev/null and b/tests/host_tools/hotplug_time.o differ diff --git a/tests/integration_tests/performance/test_vcpu_hotplug.py b/tests/integration_tests/performance/test_vcpu_hotplug.py new file mode 100644 index 000000000000..ce3746f0925e --- /dev/null +++ b/tests/integration_tests/performance/test_vcpu_hotplug.py @@ -0,0 +1,198 @@ +# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Testing hotplug performance""" + +import os +import re +import time +from pathlib import Path + +import pandas +import pytest + +from framework.utils_cpuid import check_guest_cpuid_output +from host_tools.cargo_build import gcc_compile + + +@pytest.mark.parametrize( + "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] +) +def test_custom_udev_rule_latency( + microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw, vcpu_count +): + """Test the latency for hotplugging and booting CPUs in the guest""" + api_durations = [] + onlining_durations = [] + print(f"Vcpu count: {vcpu_count}") + for i in range(5): + uvm_hotplug = microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw) + uvm_hotplug.jailer.extra_args.update({"no-seccomp": None}) + uvm_hotplug.help.enable_console() + uvm_hotplug.spawn() + uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128) + uvm_hotplug.add_net_iface() + uvm_hotplug.start() + uvm_hotplug.ssh.run("rm /usr/lib/udev/rules.d/40-vm-hotadd.rules") + uvm_hotplug.ssh.scp_put( + Path("./host_tools/1-cpu-hotplug.rules"), + Path("/usr/lib/udev/rules.d/1-cpu-hotplug.rules"), + ) + + time.sleep(0.25) + + uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count}) + time.sleep(0.25) + _, stdout, _ = uvm_hotplug.ssh.run("dmesg") + + # Extract API call duration + api_duration = ( + float( + re.findall( + r"Total previous API call duration: (\d+) us\.", + uvm_hotplug.log_data, + )[-1] + ) + / 1000 + ) + + # Extract onlining timings + start = float( + re.findall(r"\[\s+(\d+\.\d+)\] CPU1 has been hot-added\n", stdout)[0] + ) + end = float(re.findall(r"\[\s+(\d+\.\d+)\] \w+", stdout)[-1]) + elapsed_time = (end - start) * 1000 + print(f"Api call duration: {api_duration} ms") + print(f"Onlining duration: {elapsed_time} ms") + api_durations.append(api_duration) + onlining_durations.append(elapsed_time) + uvm_hotplug.kill() + time.sleep(1) + + avg_api_duration = sum(api_durations) / 5 + avg_onlining_duration = sum(onlining_durations) / 5 + print(f"Averages for {vcpu_count} hotplugged vcpus:") + print(f"\tAverage API call duration: {avg_api_duration} ms") + print(f"\tAverage onliing duration: {avg_onlining_duration} ms") + + +@pytest.mark.parametrize( + "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] +) +def test_default_udev_rule_latency( + microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw, vcpu_count +): + """Test the latency for hotplugging and booting CPUs in the guest""" + api_durations = [] + onlining_durations = [] + print(f"Vcpu count: {vcpu_count}") + for i in range(5): + uvm_hotplug = microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw) + uvm_hotplug.jailer.extra_args.update({"no-seccomp": None}) + uvm_hotplug.help.enable_console() + uvm_hotplug.spawn() + uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128) + uvm_hotplug.add_net_iface() + uvm_hotplug.start() + + time.sleep(0.25) + + _, stdout, _ = uvm_hotplug.ssh.run("ls /usr/lib/udev/rules.d") + default_rule = re.search(r"40-vm-hotadd\.rules", stdout) + assert default_rule is not None + + uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count}) + time.sleep(0.25) + _, stdout, _ = uvm_hotplug.ssh.run("dmesg") + + # Extract API call duration + api_duration = ( + float( + re.findall( + r"Total previous API call duration: (\d+) us\.", + uvm_hotplug.log_data, + )[-1] + ) + / 1000 + ) + + # Extract onlining timings + start = float( + re.findall(r"\[\s+(\d+\.\d+)\] CPU1 has been hot-added\n", stdout)[0] + ) + end = float(re.findall(r"\[\s+(\d+\.\d+)\] \w+", stdout)[-1]) + elapsed_time = (end - start) * 1000 + print(f"Api call duration: {api_duration} ms") + print(f"Onlining duration: {elapsed_time} ms") + api_durations.append(api_duration) + onlining_durations.append(elapsed_time) + uvm_hotplug.kill() + time.sleep(1) + + avg_api_duration = sum(api_durations) / 5 + avg_onlining_duration = sum(onlining_durations) / 5 + print(f"Averages for {vcpu_count} hotplugged vcpus:") + print(f"\tAverage API call duration: {avg_api_duration} ms") + print(f"\tAverage onliing duration: {avg_onlining_duration} ms") + + +@pytest.mark.parametrize( + "vcpu_count", [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] +) +def test_manual_latency( + microvm_factory, guest_kernel_linux_acpi_only, rootfs_rw, vcpu_count +): + """Test the latency for hotplugging and booting CPUs in the guest""" + gcc_compile(Path("./host_tools/hotplug_time.c"), Path("host_tools/hotplug_time.o")) + data = [] + for i in range(50): + uvm_hotplug = microvm_factory.build(guest_kernel_linux_acpi_only, rootfs_rw) + uvm_hotplug.jailer.extra_args.update({"boot-timer": None, "no-seccomp": None}) + uvm_hotplug.help.enable_console() + uvm_hotplug.spawn() + uvm_hotplug.basic_config(vcpu_count=1, mem_size_mib=128) + uvm_hotplug.add_net_iface() + uvm_hotplug.start() + uvm_hotplug.ssh.scp_put( + Path("./host_tools/hotplug.sh"), Path("/home/hotplug.sh") + ) + uvm_hotplug.ssh.scp_put( + Path("./host_tools//hotplug_time.o"), Path("/home/hotplug_time.o") + ) + uvm_hotplug.ssh.run( + "tmux new-session -d /bin/bash /home/hotplug.sh > /home/test 2>&1" + ) + + uvm_hotplug.api.hotplug.put(Vcpu={"add": vcpu_count}) + + time.sleep(1.5) + # Extract API call duration + api_duration = ( + float( + re.findall( + r"Total previous API call duration: (\d+) us\.", + uvm_hotplug.log_data, + )[-1] + ) + / 1000 + ) + try: + timestamp = ( + float( + re.findall( + r"Guest-boot-time\s+\=\s+(\d+)\s+us", uvm_hotplug.log_data + )[0] + ) + / 1000 + ) + except: + data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": None}) + continue + # Extract onlining timings + data.append({"vcpus": vcpu_count, "api": api_duration, "onlining": timestamp}) + + df = pandas.DataFrame.from_dict(data).to_csv( + f"../test_results/manual-hotplug_{vcpu_count}.csv", + index=False, + float_format="%.3f", + )