diff --git a/Cargo.lock b/Cargo.lock index bbc676a90a3..cddc66121b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -805,11 +805,11 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "linux-loader" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "132a531b85b3a164012ab682c72f8f2cce7757f187be5f60782fd2b4cda9cb34" +checksum = "eb68dd3452f25a8defaf0ae593509cff0c777683e4d8924f59ac7c5f89267a83" dependencies = [ - "vm-memory 0.13.1", + "vm-memory 0.14.0", ] [[package]] @@ -1556,7 +1556,7 @@ dependencies = [ "vhost", "vm-allocator", "vm-fdt", - "vm-memory 0.13.1", + "vm-memory 0.14.0", "vm-superio", ] diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index f3b63a40455..5ffcdabdb12 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -18,7 +18,7 @@ kvm-ioctls = "0.16.0" lazy_static = "1.4.0" libc = "0.2.117" memfd = "0.6.3" -linux-loader = "0.10.0" +linux-loader = "0.11.0" serde = { version = "1.0.136", features = ["derive", "rc"] } semver = { version = "1.0.17", features = ["serde"] } serde_json = "1.0.78" @@ -26,10 +26,10 @@ timerfd = "1.5.0" thiserror = "1.0.32" displaydoc = "0.2.4" userfaultfd = "0.7.0" -vhost = { version = "0.10.0", features = ["vhost-user-frontend"] } +vhost = { version = "0.10", features = ["vhost-user-frontend", "vhost-net", "vhost-user"] } vm-allocator = "0.1.0" vm-superio = "0.7.0" -vm-memory = { version = "0.13.1", features = ["backend-mmap", "backend-bitmap"] } +vm-memory = { version = "0.14.0", features = ["backend-mmap", "backend-bitmap"] } log = { version = "0.4.17", features = ["std", "serde"] } aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } base64 = "0.21.0" diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index cac6580d319..50fd174fd00 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -883,9 +883,19 @@ fn attach_net_devices<'a, I: Iterator>> + Debug>( event_manager: &mut EventManager, ) -> Result<(), StartMicrovmError> { for net_device in net_devices { - let id = net_device.lock().expect("Poisoned lock").id().clone(); + let (id, is_vhost) = { + let locked = net_device.lock().expect("Poisoned lock"); + (locked.id().clone(), locked.is_vhost()) + }; // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device(event_manager, vmm, id, net_device.clone(), cmdline, false)?; + attach_virtio_device( + event_manager, + vmm, + id, + net_device.clone(), + cmdline, + is_vhost, + )?; } Ok(()) } diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 0a72e0814c3..7681e7a6714 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -290,19 +290,34 @@ impl<'a> Persist<'a> for MMIODeviceManager { } TYPE_NET => { let net = locked_device.as_any().downcast_ref::().unwrap(); - if let (Some(mmds_ns), None) = - (net.mmds_ns.as_ref(), states.mmds_version.as_ref()) - { - states.mmds_version = - Some(mmds_ns.mmds.lock().expect("Poisoned lock").version().into()); + if net.is_vhost() { + warn!("skipping vhost-net device. It doesn't support snapshotting yet"); + } else { + match net { + Net::Virtio(virtionet) => { + if let (Some(mmds_ns), None) = + (virtionet.mmds_ns.as_ref(), states.mmds_version.as_ref()) + { + states.mmds_version = Some( + mmds_ns + .mmds + .lock() + .expect("Poisoned lock") + .version() + .into(), + ); + } + + states.net_devices.push(ConnectedNetState { + device_id: devid.clone(), + device_state: net.save(), + transport_state, + device_info: device_info.clone(), + }) + } + Net::Vhost(_) => panic!(), + } } - - states.net_devices.push(ConnectedNetState { - device_id: devid.clone(), - device_state: net.save(), - transport_state, - device_info: device_info.clone(), - }); } TYPE_VSOCK => { let vsock = locked_device @@ -506,7 +521,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { } else if state .net_devices .iter() - .any(|dev| dev.device_state.mmds_ns.is_some()) + .any(|dev| dev.device_state.mmds_ns().is_some()) { // If there's at least one network device having an mmds_ns, it means // that we are restoring from a version that did not persist the `MmdsVersionState`. diff --git a/src/vmm/src/devices/virtio/mod.rs b/src/vmm/src/devices/virtio/mod.rs index 23ab2914401..bcab1387252 100644 --- a/src/vmm/src/devices/virtio/mod.rs +++ b/src/vmm/src/devices/virtio/mod.rs @@ -10,6 +10,8 @@ use std::any::Any; use std::io::Error as IOError; +pub use ::vhost::Error as VhostError; + pub mod balloon; pub mod block; pub mod device; @@ -66,6 +68,9 @@ pub enum ActivateError { BadActivate, /// Vhost user: {0} VhostUser(vhost_user::VhostUserError), + + /// Vhost errored on one of the ioctls + Vhost(VhostError), } /// Trait that helps in upcasting an object to Any diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index 9d8cdfd7372..a887962830d 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -13,6 +13,7 @@ use std::sync::atomic::AtomicU32; use std::sync::{Arc, Mutex}; use std::{cmp, mem}; +use event_manager::{EventOps, Events, MutEventSubscriber}; use libc::EAGAIN; use log::{error, warn}; use utils::eventfd::EventFd; @@ -20,6 +21,7 @@ use utils::net::mac::MacAddr; use utils::u64_to_usize; use vm_memory::GuestMemoryError; +use super::vhost::device::VhostNet; use crate::devices::virtio::device::{DeviceState, IrqTrigger, IrqType, VirtioDevice}; use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1; use crate::devices::virtio::gen::virtio_net::{ @@ -103,12 +105,131 @@ pub struct ConfigSpace { // SAFETY: `ConfigSpace` contains only PODs. unsafe impl ByteValued for ConfigSpace {} +#[derive(Debug)] +pub enum Net { + Virtio(VirtioNet), + Vhost(VhostNet), +} + +impl Net { + pub fn new( + id: String, + tap_if_name: &str, + guest_mac: Option, + rx_rate_limiter: RateLimiter, + tx_rate_limiter: RateLimiter, + vhost: Option, + ) -> Result { + if let Some(true) = vhost { + if rx_rate_limiter != RateLimiter::default() + || tx_rate_limiter != RateLimiter::default() + { + unimplemented!(); + } + Ok(Self::Vhost(VhostNet::new(id, tap_if_name, guest_mac)?)) + } else { + Ok(Self::Virtio(VirtioNet::new( + id, + tap_if_name, + guest_mac, + rx_rate_limiter, + tx_rate_limiter, + )?)) + } + } + + pub fn process_virtio_queues(&mut self) { + match self { + Self::Virtio(n) => n.process_virtio_queues(), + Self::Vhost(_) => unimplemented!(), + } + } + + pub fn id(&self) -> &String { + match self { + Self::Virtio(n) => &n.id, + Self::Vhost(n) => &n.id, + } + } + + /// Provides the MAC of this net device. + pub fn guest_mac(&self) -> Option<&MacAddr> { + match self { + Self::Virtio(n) => n.guest_mac.as_ref(), + Self::Vhost(n) => n.guest_mac.as_ref(), + } + } + + /// Provides the host IFACE name of this net device. + pub fn iface_name(&self) -> String { + match self { + Self::Virtio(b) => b.iface_name(), + Self::Vhost(b) => b.iface_name(), + } + } + + /// Provides the MmdsNetworkStack of this net device. + pub fn mmds_ns(&self) -> Option<&MmdsNetworkStack> { + match self { + Self::Virtio(b) => b.mmds_ns(), + Self::Vhost(_) => None, + } + } + + pub fn configure_mmds_network_stack(&mut self, ipv4_addr: Ipv4Addr, mmds: Arc>) { + match self { + Self::Virtio(b) => b.configure_mmds_network_stack(ipv4_addr, mmds), + Self::Vhost(_) => unimplemented!(), + } + } + + pub fn disable_mmds_network_stack(&mut self) { + match self { + Self::Virtio(b) => b.disable_mmds_network_stack(), + Self::Vhost(_) => unimplemented!(), + } + } + + pub fn rx_rate_limiter(&self) -> &RateLimiter { + match self { + Self::Virtio(b) => b.rx_rate_limiter(), + Self::Vhost(_) => unimplemented!(), + } + } + + pub fn tx_rate_limiter(&self) -> &RateLimiter { + match self { + Self::Virtio(b) => b.tx_rate_limiter(), + Self::Vhost(_) => unimplemented!(), + } + } + + pub fn patch_rate_limiters( + &mut self, + rx_bytes: BucketUpdate, + rx_ops: BucketUpdate, + tx_bytes: BucketUpdate, + tx_ops: BucketUpdate, + ) { + match self { + Self::Virtio(b) => b.patch_rate_limiters(rx_bytes, rx_ops, tx_bytes, tx_ops), + Self::Vhost(_) => unimplemented!(), + } + } + pub fn is_vhost(&self) -> bool { + match self { + Self::Virtio(_) => false, + Self::Vhost(_) => true, + } + } +} + /// VirtIO network device. /// /// It emulates a network device able to exchange L2 frames between the guest /// and a host-side tap device. #[derive(Debug)] -pub struct Net { +pub struct VirtioNet { pub(crate) id: String, /// The backend for this device: a tap. @@ -144,7 +265,7 @@ pub struct Net { pub(crate) metrics: Arc, } -impl Net { +impl VirtioNet { /// Create a new virtio network device with the given TAP interface. pub fn new_with_tap( id: String, @@ -177,7 +298,7 @@ impl Net { queues.push(Queue::new(size)); } - Ok(Net { + Ok(VirtioNet { id: id.clone(), tap, avail_features, @@ -221,16 +342,6 @@ impl Net { Self::new_with_tap(id, tap, guest_mac, rx_rate_limiter, tx_rate_limiter) } - /// Provides the ID of this net device. - pub fn id(&self) -> &String { - &self.id - } - - /// Provides the MAC of this net device. - pub fn guest_mac(&self) -> Option<&MacAddr> { - self.guest_mac.as_ref() - } - /// Provides the host IFACE name of this net device. pub fn iface_name(&self) -> String { self.tap.if_name_as_str().to_string() @@ -780,6 +891,94 @@ impl Net { } impl VirtioDevice for Net { + fn avail_features(&self) -> u64 { + match self { + Self::Virtio(n) => n.avail_features, + Self::Vhost(n) => n.avail_features, + } + } + + fn acked_features(&self) -> u64 { + match self { + Self::Virtio(n) => n.acked_features, + Self::Vhost(n) => n.acked_features, + } + } + + fn set_acked_features(&mut self, acked_features: u64) { + match self { + Self::Virtio(n) => n.acked_features = acked_features, + Self::Vhost(n) => n.acked_features = acked_features, + } + } + fn device_type(&self) -> u32 { + TYPE_NET + } + fn queues(&self) -> &[Queue] { + match self { + Self::Virtio(n) => &n.queues, + Self::Vhost(n) => &n.queues, + } + } + + fn queues_mut(&mut self) -> &mut [Queue] { + match self { + Self::Virtio(n) => &mut n.queues, + Self::Vhost(n) => &mut n.queues, + } + } + + fn queue_events(&self) -> &[EventFd] { + match self { + Self::Virtio(n) => &n.queue_evts, + Self::Vhost(n) => &n.queue_evts, + } + } + + fn interrupt_evt(&self) -> &EventFd { + match self { + Self::Virtio(n) => &n.irq_trigger.irq_evt, + Self::Vhost(n) => &n.irq_trigger.irq_evt, + } + } + + fn interrupt_status(&self) -> Arc { + match self { + Self::Virtio(n) => n.irq_trigger.irq_status.clone(), + Self::Vhost(n) => n.irq_trigger.irq_status.clone(), + } + } + + fn read_config(&self, offset: u64, data: &mut [u8]) { + match self { + Self::Virtio(n) => n.read_config(offset, data), + Self::Vhost(n) => n.read_config(offset, data), + } + } + + fn write_config(&mut self, offset: u64, data: &[u8]) { + match self { + Self::Virtio(n) => n.write_config(offset, data), + Self::Vhost(n) => n.write_config(offset, data), + } + } + + fn activate(&mut self, mem: GuestMemoryMmap) -> Result<(), ActivateError> { + match self { + Self::Virtio(n) => n.activate(mem), + Self::Vhost(n) => n.activate(mem), + } + } + + fn is_activated(&self) -> bool { + match self { + Self::Virtio(n) => n.device_state.is_activated(), + Self::Vhost(n) => n.device_state.is_activated(), + } + } +} + +impl VirtioDevice for VirtioNet { fn avail_features(&self) -> u64 { self.avail_features } @@ -872,6 +1071,22 @@ impl VirtioDevice for Net { } } +impl MutEventSubscriber for Net { + fn process(&mut self, event: Events, ops: &mut EventOps) { + match self { + Self::Virtio(n) => n.process(event, ops), + Self::Vhost(n) => n.process(event, ops), + } + } + + fn init(&mut self, ops: &mut EventOps) { + match self { + Self::Virtio(n) => n.init(ops), + Self::Vhost(n) => n.init(ops), + } + } +} + #[cfg(test)] #[macro_use] pub mod tests { diff --git a/src/vmm/src/devices/virtio/net/event_handler.rs b/src/vmm/src/devices/virtio/net/event_handler.rs index 9bc0632ea2c..00c103c55cf 100644 --- a/src/vmm/src/devices/virtio/net/event_handler.rs +++ b/src/vmm/src/devices/virtio/net/event_handler.rs @@ -1,15 +1,15 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -use event_manager::{EventOps, Events, MutEventSubscriber}; +use event_manager::{EventOps, Events}; use utils::epoll::EventSet; use crate::devices::virtio::device::VirtioDevice; -use crate::devices::virtio::net::device::Net; +use crate::devices::virtio::net::device::VirtioNet; use crate::devices::virtio::net::{RX_INDEX, TX_INDEX}; use crate::logger::{error, warn, IncMetric}; -impl Net { +impl VirtioNet { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_VIRTQ_RX: u32 = 1; const PROCESS_VIRTQ_TX: u32 = 2; @@ -78,10 +78,8 @@ impl Net { error!("Failed to un-register activate event: {}", err); } } -} -impl MutEventSubscriber for Net { - fn process(&mut self, event: Events, ops: &mut EventOps) { + pub(crate) fn process(&mut self, event: Events, ops: &mut EventOps) { let source = event.data(); let event_set = event.event_set(); @@ -117,7 +115,7 @@ impl MutEventSubscriber for Net { } } - fn init(&mut self, ops: &mut EventOps) { + pub(crate) fn init(&mut self, ops: &mut EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), diff --git a/src/vmm/src/devices/virtio/net/mod.rs b/src/vmm/src/devices/virtio/net/mod.rs index 52d4cb92d7b..8b6fd0df598 100644 --- a/src/vmm/src/devices/virtio/net/mod.rs +++ b/src/vmm/src/devices/virtio/net/mod.rs @@ -26,6 +26,8 @@ pub mod test_utils; mod gen; +pub mod vhost; + pub use tap::{Tap, TapError}; pub use self::device::Net; diff --git a/src/vmm/src/devices/virtio/net/persist.rs b/src/vmm/src/devices/virtio/net/persist.rs index 271977a4792..394b4e26970 100644 --- a/src/vmm/src/devices/virtio/net/persist.rs +++ b/src/vmm/src/devices/virtio/net/persist.rs @@ -13,6 +13,7 @@ use utils::net::mac::MacAddr; use super::device::Net; use super::NET_NUM_QUEUES; use crate::devices::virtio::device::DeviceState; +use crate::devices::virtio::net::device::VirtioNet; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::TYPE_NET; @@ -31,10 +32,23 @@ pub struct NetConfigSpaceState { guest_mac: Option, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum NetState { + Virtio(VirtioNetState), +} + +impl NetState { + pub fn mmds_ns(&self) -> Option<&MmdsNetworkStackState> { + match self { + Self::Virtio(b) => b.mmds_ns.as_ref(), + } + } +} + /// Information about the network device that are saved /// at snapshot. #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NetState { +pub struct VirtioNetState { id: String, tap_if_name: String, rx_rate_limiter_state: RateLimiterState, @@ -73,8 +87,30 @@ impl Persist<'_> for Net { type Error = NetPersistError; fn save(&self) -> Self::State { - NetState { - id: self.id().clone(), + match self { + Self::Virtio(b) => NetState::Virtio(b.save()), + Self::Vhost(_) => unimplemented!(), + } + } + + fn restore( + constructor_args: Self::ConstructorArgs, + state: &Self::State, + ) -> Result { + match state { + NetState::Virtio(s) => Ok(Self::Virtio(VirtioNet::restore(constructor_args, s)?)), + } + } +} + +impl Persist<'_> for VirtioNet { + type State = VirtioNetState; + type ConstructorArgs = NetConstructorArgs; + type Error = NetPersistError; + + fn save(&self) -> Self::State { + VirtioNetState { + id: self.id.clone(), tap_if_name: self.iface_name(), rx_rate_limiter_state: self.rx_rate_limiter.save(), tx_rate_limiter_state: self.tx_rate_limiter.save(), @@ -93,7 +129,7 @@ impl Persist<'_> for Net { // RateLimiter::restore() can fail at creating a timerfd. let rx_rate_limiter = RateLimiter::restore((), &state.rx_rate_limiter_state)?; let tx_rate_limiter = RateLimiter::restore((), &state.tx_rate_limiter_state)?; - let mut net = Net::new( + let mut net = VirtioNet::new( state.id.clone(), &state.tap_if_name, state.config_space.guest_mac, diff --git a/src/vmm/src/devices/virtio/net/tap.rs b/src/vmm/src/devices/virtio/net/tap.rs index 5c8151f27ea..8b305fa3ef4 100644 --- a/src/vmm/src/devices/virtio/net/tap.rs +++ b/src/vmm/src/devices/virtio/net/tap.rs @@ -51,7 +51,7 @@ ioctl_iow_nr!(TUNSETVNETHDRSZ, TUNTAP, 216, ::std::os::raw::c_int); /// Tap goes out of scope, and the kernel will clean up the interface automatically. #[derive(Debug)] pub struct Tap { - tap_file: File, + pub(crate) tap_file: File, pub(crate) if_name: [u8; IFACE_NAME_MAX_LEN], #[cfg(test)] diff --git a/src/vmm/src/devices/virtio/net/test_utils.rs b/src/vmm/src/devices/virtio/net/test_utils.rs index b4222a8702d..78daf274b76 100644 --- a/src/vmm/src/devices/virtio/net/test_utils.rs +++ b/src/vmm/src/devices/virtio/net/test_utils.rs @@ -17,8 +17,8 @@ use utils::net::mac::MacAddr; #[cfg(test)] use crate::devices::virtio::net::device::vnet_hdr_len; +use crate::devices::virtio::net::device::VirtioNet; use crate::devices::virtio::net::tap::{IfReqBuilder, Tap}; -use crate::devices::virtio::net::Net; use crate::devices::virtio::queue::{Queue, QueueError}; use crate::devices::virtio::test_utils::VirtQueue; use crate::devices::DeviceError; @@ -29,7 +29,7 @@ use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; static NEXT_INDEX: AtomicUsize = AtomicUsize::new(1); -pub fn default_net() -> Net { +pub fn default_net() -> VirtioNet { let next_tap = NEXT_INDEX.fetch_add(1, Ordering::SeqCst); // Id is the firecracker-facing identifier, e.g. local to the FC process. We thus do not need to // make sure it is globally unique @@ -41,7 +41,7 @@ pub fn default_net() -> Net { let guest_mac = default_guest_mac(); - let mut net = Net::new( + let mut net = VirtioNet::new( tap_device_id, tap_if_name, Some(guest_mac), @@ -58,13 +58,13 @@ pub fn default_net() -> Net { net } -pub fn default_net_no_mmds() -> Net { +pub fn default_net_no_mmds() -> VirtioNet { let next_tap = NEXT_INDEX.fetch_add(1, Ordering::SeqCst); let tap_device_id = format!("net-device{}", next_tap); let guest_mac = default_guest_mac(); - let net = Net::new( + let net = VirtioNet::new( tap_device_id, "net-device%d", Some(guest_mac), @@ -309,7 +309,7 @@ pub(crate) fn inject_tap_tx_frame(net: &Net, len: usize) -> Vec { frame } -pub fn write_element_in_queue(net: &Net, idx: u16, val: u64) -> Result<(), DeviceError> { +pub fn write_element_in_queue(net: &VirtioNet, idx: u16, val: u64) -> Result<(), DeviceError> { if idx as usize > net.queue_evts.len() { return Err(DeviceError::QueueError(QueueError::DescIndexOutOfBounds( idx, @@ -319,7 +319,7 @@ pub fn write_element_in_queue(net: &Net, idx: u16, val: u64) -> Result<(), Devic Ok(()) } -pub fn get_element_from_queue(net: &Net, idx: u16) -> Result { +pub fn get_element_from_queue(net: &VirtioNet, idx: u16) -> Result { if idx as usize > net.queue_evts.len() { return Err(DeviceError::QueueError(QueueError::DescIndexOutOfBounds( idx, @@ -332,13 +332,13 @@ pub fn default_guest_mac() -> MacAddr { MacAddr::from_str("11:22:33:44:55:66").unwrap() } -pub fn set_mac(net: &mut Net, mac: MacAddr) { +pub fn set_mac(net: &mut VirtioNet, mac: MacAddr) { net.guest_mac = Some(mac); net.config_space.guest_mac = mac; } // Assigns "guest virtio driver" activated queues to the net device. -pub fn assign_queues(net: &mut Net, rxq: Queue, txq: Queue) { +pub fn assign_queues(net: &mut VirtioNet, rxq: Queue, txq: Queue) { net.queues.clear(); net.queues.push(rxq); net.queues.push(txq); diff --git a/src/vmm/src/devices/virtio/net/vhost/device.rs b/src/vmm/src/devices/virtio/net/vhost/device.rs new file mode 100644 index 00000000000..74f60233c9e --- /dev/null +++ b/src/vmm/src/devices/virtio/net/vhost/device.rs @@ -0,0 +1,308 @@ +use std::cmp; +use std::io::Write; +use std::sync::atomic::AtomicU32; +use std::sync::Arc; + +use log::error; +use utils::eventfd::EventFd; +use utils::net::mac::MacAddr; +use utils::u64_to_usize; +use vm_memory::GuestMemory; + +use crate::devices::virtio::device::{DeviceState, IrqTrigger, VirtioDevice}; +use crate::devices::virtio::gen::virtio_net::{ + VIRTIO_F_VERSION_1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_MAC, + VIRTIO_NET_F_MRG_RXBUF, +}; +use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; +use crate::devices::virtio::net::device::ConfigSpace; +use crate::devices::virtio::net::{gen, NetError, Tap, NET_QUEUE_SIZES, RX_INDEX, TX_INDEX}; +use crate::devices::virtio::queue::Queue; +use crate::devices::virtio::{ActivateError, TYPE_NET}; +use crate::vstate::memory::{ByteValued, GuestMemoryMmap}; + +pub const VIRTIO_NET_F_GUEST_USO4: u32 = 54; +pub const VIRTIO_NET_F_GUEST_USO6: u32 = 55; +pub const VIRTIO_NET_F_HOST_USO: u32 = 56; +pub const TUN_F_USO4: u32 = 0x20; +pub const TUN_F_USO6: u32 = 0x40; +pub const VIRTIO_RING_F_INDIRECT_DESC: u64 = 28; + +use vhost::net::VhostNet as vhost_VhostNet; +use vhost::{vhost_kern, VhostBackend, VhostUserMemoryRegionInfo, VringConfigData}; +use vm_memory::{Address, GuestAddress, GuestMemoryRegion}; + +impl core::fmt::Debug for VhostNet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "VhostNet {{ id: {:?}, tap: {:?}, avail_features: {:?}, acked_features: {:?}, \ + config_space: {:?}, guest_mac: {:?}, device_state: {:?}, activate_evt: {:?}, \ + features: {:?} }}", + self.id, + self.tap, + self.avail_features, + self.acked_features, + self.config_space, + self.guest_mac, + self.device_state, + self.activate_evt, + self.features, + ) + } +} + +pub struct VhostNet { + pub(crate) id: String, + + /// The backend for this device: a tap. + pub tap: Tap, + + pub(crate) avail_features: u64, + pub(crate) acked_features: u64, + + pub(crate) queues: Vec, + pub(crate) queue_evts: Vec, + + pub(crate) irq_trigger: IrqTrigger, + + pub(crate) config_space: ConfigSpace, + pub(crate) guest_mac: Option, + + pub(crate) device_state: DeviceState, + pub(crate) activate_evt: EventFd, + + pub(crate) vhost: Option>>, + + pub(crate) features: u64, +} + +impl VhostNet { + /// Provides the host IFACE name of this net device. + pub fn iface_name(&self) -> String { + self.tap.if_name_as_str().to_string() + } + + /// Create a new virtio network device with the given TAP interface. + pub fn new_with_tap( + id: String, + tap: Tap, + guest_mac: Option, + ) -> Result { + let mut avail_features = 1 << VIRTIO_NET_F_GUEST_CSUM + | 1 << VIRTIO_NET_F_CSUM + | 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_RING_F_EVENT_IDX; + let xdp = true; + let uso = false; + + avail_features |= if !xdp { + 1 << VIRTIO_NET_F_GUEST_TSO4 + | 1 << VIRTIO_NET_F_HOST_TSO4 + | 1 << VIRTIO_NET_F_GUEST_TSO6 + | 1 << VIRTIO_NET_F_HOST_TSO6 + | 1 << VIRTIO_NET_F_HOST_USO + } else { + 0 + }; + + avail_features |= if !xdp && uso { + 1 << VIRTIO_NET_F_GUEST_USO4 | 1 << VIRTIO_NET_F_GUEST_USO6 + } else { + 0 + }; + + // We could announce VIRTIO_RING_F_INDIRECT_DESC and + // VIRTIO_NET_F_MRG_RXBUF but this is not needed at this + // point. + + let mut config_space = ConfigSpace::default(); + if let Some(mac) = guest_mac { + config_space.guest_mac = mac; + // Enabling feature for MAC address configuration + // If not set, the driver will generates a random MAC address + avail_features |= 1 << VIRTIO_NET_F_MAC; + } + + let mut queue_evts = Vec::new(); + let mut queues = Vec::new(); + for size in NET_QUEUE_SIZES { + queue_evts.push(EventFd::new(libc::EFD_NONBLOCK).map_err(NetError::EventFd)?); + queues.push(Queue::new(size)); + } + + let features: u64 = 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_RING_F_EVENT_IDX + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_NET_F_MRG_RXBUF; + + Ok(VhostNet { + id: id.clone(), + tap, + avail_features, + acked_features: 0u64, + queues, + queue_evts, + irq_trigger: IrqTrigger::new().map_err(NetError::EventFd)?, + config_space, + guest_mac, + device_state: DeviceState::Inactive, + activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(NetError::EventFd)?, + vhost: None, + features, + }) + } + + /// Create a new virtio network device given the interface name. + pub fn new( + id: String, + tap_if_name: &str, + guest_mac: Option, + ) -> Result { + let tap = Tap::open_named(tap_if_name).map_err(NetError::TapOpen)?; + + // Set offload flags to match the virtio features below. + tap.set_offload(gen::TUN_F_CSUM | gen::TUN_F_UFO | gen::TUN_F_TSO4 | gen::TUN_F_TSO6) + .map_err(NetError::TapSetOffload)?; + + let vnet_hdr_size = i32::try_from(super::super::device::vnet_hdr_len()).unwrap(); + tap.set_vnet_hdr_size(vnet_hdr_size) + .map_err(NetError::TapSetVnetHdrSize)?; + + Self::new_with_tap(id, tap, guest_mac) + } + + fn setup_vhost_handle(&mut self, mem: &GuestMemoryMmap) -> Result<(), ::vhost::Error> { + let vhost = vhost_kern::net::Net::new(Arc::new(mem.clone()))?; + + vhost.set_owner()?; + vhost.set_features(self.features)?; + + let regions: Vec<_> = mem + .iter() + .map(|region| VhostUserMemoryRegionInfo { + guest_phys_addr: region.start_addr().raw_value(), + memory_size: region.size() as u64, + userspace_addr: mem.get_host_address(GuestAddress(0x0)).unwrap() as u64, + mmap_offset: 0, + mmap_handle: -1, + }) + .collect(); + vhost.set_mem_table(®ions)?; + vhost.set_vring_call(RX_INDEX, &self.irq_trigger.irq_evt)?; + vhost.set_vring_call(TX_INDEX, &self.irq_trigger.irq_evt)?; + + vhost.set_vring_kick(RX_INDEX, &self.queue_evts[RX_INDEX])?; + vhost.set_vring_kick(TX_INDEX, &self.queue_evts[TX_INDEX])?; + + for (queue_index, queue) in self.queues().iter().enumerate() { + let qsize = queue.actual_size(); + vhost.set_vring_num(queue_index, qsize)?; + + let vring = VringConfigData { + flags: 0, + queue_max_size: qsize, + queue_size: qsize, + desc_table_addr: queue.desc_table.raw_value(), + used_ring_addr: queue.used_ring.raw_value(), + avail_ring_addr: queue.avail_ring.raw_value(), + log_addr: None, + }; + + vhost.set_vring_addr(queue_index, &vring)?; + vhost.set_backend(queue_index, Some(&self.tap.tap_file))?; + } + self.vhost = Some(vhost); + Ok(()) + } +} + +impl VirtioDevice for VhostNet { + fn avail_features(&self) -> u64 { + self.avail_features + } + + fn acked_features(&self) -> u64 { + if self.acked_features & (1 << VIRTIO_NET_F_GUEST_USO4) == 0 { + error!("please use a guest kernel with USO support (patched 6.1 or 6.2)"); + } + self.acked_features + } + + fn set_acked_features(&mut self, acked_features: u64) { + self.acked_features = acked_features; + } + + fn device_type(&self) -> u32 { + TYPE_NET + } + + fn queues(&self) -> &[Queue] { + &self.queues + } + + fn queues_mut(&mut self) -> &mut [Queue] { + &mut self.queues + } + + fn queue_events(&self) -> &[EventFd] { + &self.queue_evts + } + + fn interrupt_evt(&self) -> &EventFd { + &self.irq_trigger.irq_evt + } + + fn interrupt_status(&self) -> Arc { + self.irq_trigger.irq_status.clone() + } + + fn read_config(&self, offset: u64, mut data: &mut [u8]) { + let config_space_bytes = self.config_space.as_slice(); + let config_len = config_space_bytes.len() as u64; + if offset >= config_len { + error!("Failed to read config space"); + return; + } + if let Some(end) = offset.checked_add(data.len() as u64) { + // This write can't fail, offset and end are checked against config_len. + data.write_all( + &config_space_bytes[u64_to_usize(offset)..u64_to_usize(cmp::min(end, config_len))], + ) + .unwrap(); + } + } + + fn write_config(&mut self, offset: u64, data: &[u8]) { + let config_space_bytes = self.config_space.as_mut_slice(); + let start = usize::try_from(offset).ok(); + let end = start.and_then(|s| s.checked_add(data.len())); + let Some(dst) = start + .zip(end) + .and_then(|(start, end)| config_space_bytes.get_mut(start..end)) + else { + error!("Failed to write config space"); + return; + }; + + dst.copy_from_slice(data); + self.guest_mac = Some(self.config_space.guest_mac); + } + + fn activate(&mut self, mem: GuestMemoryMmap) -> Result<(), ActivateError> { + self.setup_vhost_handle(&mem) + .map_err(ActivateError::Vhost)?; + + if self.activate_evt.write(1).is_err() { + error!("Net: Cannot write to activate_evt"); + return Err(ActivateError::BadActivate); + } + self.device_state = DeviceState::Activated(mem); + Ok(()) + } + + fn is_activated(&self) -> bool { + self.device_state.is_activated() + } +} diff --git a/src/vmm/src/devices/virtio/net/vhost/event_handler.rs b/src/vmm/src/devices/virtio/net/vhost/event_handler.rs new file mode 100644 index 00000000000..6a5f11d06f7 --- /dev/null +++ b/src/vmm/src/devices/virtio/net/vhost/event_handler.rs @@ -0,0 +1,50 @@ +use event_manager::{EventOps, Events}; +use log::{error, warn}; +use utils::epoll::EventSet; + +use super::device::VhostNet; +use crate::devices::virtio::device::VirtioDevice; + +impl VhostNet { + const PROCESS_ACTIVATE: u32 = 0; + fn process_activate_event(&self, ops: &mut EventOps) { + if let Err(err) = self.activate_evt.read() { + error!("Failed to consume net activate event: {:?}", err); + } + if let Err(err) = ops.remove(Events::with_data( + &self.activate_evt, + Self::PROCESS_ACTIVATE, + EventSet::IN, + )) { + error!("Failed to un-register activate event: {}", err); + } + } + fn register_activate_event(&self, ops: &mut EventOps) { + if let Err(err) = ops.add(Events::with_data( + &self.activate_evt, + Self::PROCESS_ACTIVATE, + EventSet::IN, + )) { + error!("Failed to register activate event: {}", err); + } + } + + pub(crate) fn init(&mut self, ops: &mut EventOps) { + if !self.is_activated() { + self.register_activate_event(ops); + } + } + + pub(crate) fn process(&mut self, event: Events, ops: &mut EventOps) { + let source = event.data(); + + if self.is_activated() { + match source { + Self::PROCESS_ACTIVATE => self.process_activate_event(ops), + _ => { + warn!("Net: Spurious event received: {:?}", source); + } + } + } + } +} diff --git a/src/vmm/src/devices/virtio/net/vhost/mod.rs b/src/vmm/src/devices/virtio/net/vhost/mod.rs new file mode 100644 index 00000000000..a6cd76f1d62 --- /dev/null +++ b/src/vmm/src/devices/virtio/net/vhost/mod.rs @@ -0,0 +1,3 @@ +pub mod device; + +mod event_handler; diff --git a/src/vmm/src/vmm_config/net.rs b/src/vmm/src/vmm_config/net.rs index c4e835e1ad9..f248445696b 100644 --- a/src/vmm/src/vmm_config/net.rs +++ b/src/vmm/src/vmm_config/net.rs @@ -27,6 +27,8 @@ pub struct NetworkInterfaceConfig { pub rx_rate_limiter: Option, /// Rate Limiter for transmitted packages. pub tx_rate_limiter: Option, + /// Enables vhost net backend. Requires access to /dev/vhost-net. + pub vhost: Option, } impl From<&Net> for NetworkInterfaceConfig { @@ -39,6 +41,7 @@ impl From<&Net> for NetworkInterfaceConfig { guest_mac: net.guest_mac().copied(), rx_rate_limiter: rx_rl.into_option(), tx_rate_limiter: tx_rl.into_option(), + vhost: None, } } } @@ -161,6 +164,7 @@ impl NetBuilder { cfg.guest_mac, rx_rate_limiter.unwrap_or_default(), tx_rate_limiter.unwrap_or_default(), + cfg.vhost, ) .map_err(NetworkInterfaceError::CreateNetworkDevice) }