From dcdd0b1bd6c211c968512e4695089b66813cdb18 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 20 Aug 2024 21:38:55 +0100 Subject: [PATCH] Move underlay NICs back into H/W Classification (#504) Today, we get our TX and RX pathways on underlay devices for XDE by creating a secondary MAC client on each device. As part of this process we must attach a unicast MAC address (or specify `MAC_OPEN_FLAGS_NO_UNICAST_ADDR`) during creation to spin up a valid datapath, otherwise we can receive packets on our promiscuous mode handler but any sent packets are immediately dropped by MAC. However, datapath setup then fails to supply a dedicated ring/group for the new client, and the device is reduced to pure software classification. This hard-disables any ring polling threads, and so all packet processing occurs in the interrupt context. This limits throughput and increases OPTE's blast radius on control plane/crucible traffic between sleds. This PR places a hold onto the underlay NICs via `dls`, and makes use of `dls_open`/`dls_close` to acquire a valid transmit pathway onto the original (primary) MAC client, to which we can also attach a promiscuous callback. As desired, we are back in hardware classification. This work is orthogonal to #62 (and related efforts) which will get us out of promiscuous mode -- both are necessary parts of making optimal use of the illumos networking stack. Closes #489 . --- Cargo.lock | 13 +- Cargo.toml | 2 +- xde-tests/tests/loopback.rs | 2 +- xde/src/dls.rs | 27 --- xde/src/dls/mod.rs | 258 +++++++++++++++++++++++++++++ xde/src/dls/sys.rs | 90 ++++++++++ xde/src/lib.rs | 17 +- xde/src/{mac.rs => mac/mod.rs} | 122 +++++++++----- xde/src/{mac_sys.rs => mac/sys.rs} | 21 ++- xde/src/xde.rs | 153 +++++++---------- 10 files changed, 532 insertions(+), 173 deletions(-) delete mode 100644 xde/src/dls.rs create mode 100644 xde/src/dls/mod.rs create mode 100644 xde/src/dls/sys.rs rename xde/src/{mac.rs => mac/mod.rs} (82%) rename xde/src/{mac_sys.rs => mac/sys.rs} (95%) diff --git a/Cargo.lock b/Cargo.lock index bb319cce..0a537742 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -923,6 +923,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -1208,7 +1217,7 @@ dependencies = [ "dyn-clone", "heapless", "illumos-sys-hdrs", - "itertools 0.12.1", + "itertools 0.13.0", "kstat-macro", "opte", "opte-api", @@ -1240,7 +1249,7 @@ dependencies = [ "clap", "criterion", "ctor", - "itertools 0.12.1", + "itertools 0.13.0", "nix", "opte", "opte-test-utils", diff --git a/Cargo.toml b/Cargo.toml index 67f739d4..eecd6795 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,7 +51,7 @@ darling = "0.20" dyn-clone = "1.0" heapless = "0.8" ipnetwork = { version = "0.20", default-features = false } -itertools = { version = "0.12", default-features = false } +itertools = { version = "0.13", default-features = false } libc = "0.2" libnet = { git = "https://github.com/oxidecomputer/netadm-sys" } nix = { version = "0.29", features = ["signal", "user"] } diff --git a/xde-tests/tests/loopback.rs b/xde-tests/tests/loopback.rs index fa01230a..a0284807 100644 --- a/xde-tests/tests/loopback.rs +++ b/xde-tests/tests/loopback.rs @@ -11,7 +11,7 @@ fn test_xde_loopback() -> Result<()> { let topol = xde_tests::two_node_topology()?; // Now we should be able to ping b from a on the overlay. - &topol.nodes[0] + _ = &topol.nodes[0] .zone .zone .zexec(&format!("ping {}", &topol.nodes[1].port.ip()))?; diff --git a/xde/src/dls.rs b/xde/src/dls.rs deleted file mode 100644 index 7f9c3a5a..00000000 --- a/xde/src/dls.rs +++ /dev/null @@ -1,27 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2022 Oxide Computer Company - -// stuff we need from dls - -use crate::mac; -use illumos_sys_hdrs::boolean_t; -use illumos_sys_hdrs::c_int; -use illumos_sys_hdrs::datalink_id_t; -use illumos_sys_hdrs::zoneid_t; - -extern "C" { - pub fn dls_devnet_create( - mh: *mut mac::mac_handle, - linkid: datalink_id_t, - zoneid: zoneid_t, - ) -> c_int; - - pub fn dls_devnet_destroy( - mh: *mut mac::mac_handle, - linkid: *mut datalink_id_t, - wait: boolean_t, - ) -> c_int; -} diff --git a/xde/src/dls/mod.rs b/xde/src/dls/mod.rs new file mode 100644 index 00000000..6cab4783 --- /dev/null +++ b/xde/src/dls/mod.rs @@ -0,0 +1,258 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! Safe abstractions around DLS public and private functions. + +pub mod sys; + +use crate::mac::mac_client_handle; +use crate::mac::MacClient; +use crate::mac::MacPerimeterHandle; +use crate::mac::MacTxFlags; +use crate::mac::MAC_DROP_ON_NO_DESC; +use core::ffi::CStr; +use core::fmt::Display; +use core::ptr; +use core::ptr::NonNull; +use illumos_sys_hdrs::c_int; +use illumos_sys_hdrs::datalink_id_t; +use illumos_sys_hdrs::uintptr_t; +use illumos_sys_hdrs::ENOENT; +use opte::engine::packet::Packet; +use opte::engine::packet::PacketState; +pub use sys::*; + +/// An integer ID used by DLS to refer to a given link. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct LinkId(datalink_id_t); + +impl LinkId { + /// Request the link ID for a device using its name. + pub fn from_name(name: impl AsRef) -> Result { + let mut link_id = 0; + + unsafe { + match dls_mgmt_get_linkid(name.as_ref().as_ptr(), &mut link_id) { + 0 => Ok(LinkId(link_id)), + ENOENT => Err(LinkError::NotFound), + err => Err(LinkError::Other(err)), + } + } + } +} + +impl From for datalink_id_t { + fn from(val: LinkId) -> Self { + val.0 + } +} + +/// Errors encountered while querying DLS for a `LinkId`. +pub enum LinkError { + NotFound, + Other(i32), +} + +impl Display for LinkError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + LinkError::NotFound => write!(f, "link not found"), + LinkError::Other(e) => write!(f, "unknown error ({e})"), + } + } +} + +/// A hold on an existing link managed by DLS. +#[derive(Debug)] +struct DlsLink { + inner: Option, + link: LinkId, +} + +#[derive(Debug)] +struct DlsLinkInner { + dlp: *mut dls_link, + dlh: dls_dl_handle, +} + +impl DlsLink { + /// Place a hold on an existing link. + fn hold(mph: &MacPerimeterHandle) -> Result { + let mut dlp = ptr::null_mut(); + let mut dlh = ptr::null_mut(); + let link = mph.link_id(); + + let res = unsafe { dls_devnet_hold(link.into(), &mut dlh) }; + if res != 0 { + return Err(res); + } + + let res = unsafe { dls_link_hold(dls_devnet_mac(dlh), &mut dlp) }; + if res == 0 { + Ok(Self { inner: Some(DlsLinkInner { dlp, dlh }), link }) + } else { + unsafe { dls_devnet_rele(dlh) }; + Err(res) + } + } + + /// Release a hold on a given link. + /// + /// This operation requires that you acquire the MAC perimeter + /// for the target device. + fn release(mut self, mph: &MacPerimeterHandle) { + if let Some(inner) = self.inner.take() { + if mph.link_id() != self.link { + panic!("Tried to free link hold with the wrong MAC perimeter: saw {:?}, wanted {:?}", + mph.link_id(), self.link); + } + unsafe { + dls_link_rele(inner.dlp); + dls_devnet_rele(inner.dlh); + } + } + } + + /// Convert a hold into a `DlsStream` for packet Rx/Tx. + fn open_stream( + mut self, + mph: &MacPerimeterHandle, + ) -> Result { + let Some(inner) = self.inner.as_ref() else { + panic!("attempted to open a DlsStream on freed link") + }; + + if mph.link_id() != self.link { + panic!("Tried to open stream with the wrong MAC perimeter: saw {:?}, wanted {:?}", + mph.link_id(), self.link); + } + + // NOTE: this is a stlouis-only way to create a dld_str_t. It + // is virtually identical to the clean-slate state from the kmemcache, + // with no rq/wq set. + let dld_str = NonNull::new(unsafe { dld_str_create_detached() }); + let Some(dld_str) = dld_str else { + self.release(mph); + return Err(-1); + }; + + let res = unsafe { dls_open(inner.dlp, inner.dlh, dld_str.as_ptr()) }; + if res == 0 { + // DLP is held/consumed by dls_open. + _ = self.inner.take(); + Ok(DlsStream { + inner: Some(DlsStreamInner { dld_str }), + link: mph.link_id(), + } + .into()) + } else { + self.release(mph); + Err(res) + } + } +} + +impl Drop for DlsLink { + fn drop(&mut self) { + if self.inner.take().is_some() { + opte::engine::err!( + "dropped hold on link {:?} without releasing!!", + self.link + ); + } + } +} + +/// A DLS message stream on a target link, allowing packet +/// Rx and Tx. +#[derive(Debug)] +pub struct DlsStream { + inner: Option, + link: LinkId, +} + +#[derive(Debug)] +struct DlsStreamInner { + dld_str: NonNull, +} + +impl DlsStream { + pub fn open(link_id: LinkId) -> Result { + let perim = MacPerimeterHandle::from_linkid(link_id)?; + let link_handle = DlsLink::hold(&perim)?; + link_handle.open_stream(&perim) + } + + /// Returns the ID of the link this stream belongs to. + pub fn link_id(&self) -> LinkId { + self.link + } + + /// Send the [`Packet`] on this client, dropping if there is no + /// descriptor available + /// + /// This function always consumes the [`Packet`]. + /// + /// XXX The underlying mac_tx() function accepts a packet chain, + /// but for now we pass only a single packet at a time. + pub fn tx_drop_on_no_desc( + &self, + pkt: Packet, + hint: uintptr_t, + flags: MacTxFlags, + ) { + let Some(inner) = self.inner.as_ref() else { + // XXX: probably handle or signal an error here. + return; + }; + // We must unwrap the raw `mblk_t` out of the `pkt` here, + // otherwise the mblk_t would be dropped at the end of this + // function along with `pkt`. + let mut raw_flags = flags.bits(); + raw_flags |= MAC_DROP_ON_NO_DESC; + unsafe { + str_mdata_fastpath_put( + inner.dld_str.as_ptr(), + pkt.unwrap_mblk(), + hint, + raw_flags, + ) + }; + } +} + +impl MacClient for DlsStream { + fn mac_client_handle(&self) -> Result<*mut mac_client_handle, c_int> { + let Some(inner) = self.inner.as_ref() else { + return Err(-1); + }; + + Ok(unsafe { dld_str_mac_client_handle(inner.dld_str.as_ptr()) }) + } +} + +impl Drop for DlsStream { + fn drop(&mut self) { + if let Some(inner) = self.inner.take() { + match MacPerimeterHandle::from_linkid(self.link) { + Ok(_perim) => unsafe { + // NOTE: this is a stlouis-only way to free this + // dld_str_t. It will handle the remainder of the + // cleanup for which dld_str_detach would have been + // responsible. + dls_close(inner.dld_str.as_ptr()); + dld_str_destroy_detached(inner.dld_str.as_ptr()); + }, + Err(e) => opte::engine::err!( + "couldn't acquire MAC perimeter (err {}): \ + dropped stream on link {:?} without releasing", + e, + self.link, + ), + } + } + } +} diff --git a/xde/src/dls/sys.rs b/xde/src/dls/sys.rs new file mode 100644 index 00000000..3c1d85d5 --- /dev/null +++ b/xde/src/dls/sys.rs @@ -0,0 +1,90 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +// stuff we need from dls + +use crate::mac; +use crate::mac::mac_client_handle; +use crate::mac::mac_tx_cookie_t; +use illumos_sys_hdrs::boolean_t; +use illumos_sys_hdrs::c_char; +use illumos_sys_hdrs::c_int; +use illumos_sys_hdrs::datalink_id_t; +use illumos_sys_hdrs::mblk_t; +use illumos_sys_hdrs::uintptr_t; +use illumos_sys_hdrs::zoneid_t; + +extern "C" { + pub fn dls_devnet_create( + mh: *mut mac::mac_handle, + linkid: datalink_id_t, + zoneid: zoneid_t, + ) -> c_int; + + pub fn dls_devnet_destroy( + mh: *mut mac::mac_handle, + linkid: *mut datalink_id_t, + wait: boolean_t, + ) -> c_int; + + pub fn dls_mgmt_get_linkid( + name: *const c_char, + linkid: *mut datalink_id_t, + ) -> c_int; +} + +// Private DLS functions needed to have a Tx path on top of +// an existing link while circumventing `ip`. +extern "C" { + pub type dls_devnet_s; + pub type dld_str_s; + pub type dls_link; + + /// Transmit a packet chain on a given link. + /// This is effectively one layer above mac_tx. + pub fn str_mdata_fastpath_put( + dsp: *mut dld_str_s, + mp: *mut mblk_t, + f_hint: uintptr_t, + flag: u16, + ) -> mac_tx_cookie_t; + + // NOTE: ALL BELOW FUNCTIONS REQUIRE THE MAC PERIMETER TO BE HELD. + pub fn dls_devnet_hold( + link: datalink_id_t, + ddhp: *mut dls_dl_handle, + ) -> c_int; + + pub fn dls_devnet_rele(dlh: dls_dl_handle); + + pub fn dls_link_hold( + name: *const c_char, + dlpp: *mut *mut dls_link, + ) -> c_int; + + pub fn dls_link_rele(dlp: *mut dls_link); + + pub fn dls_devnet_mac(dlh: dls_dl_handle) -> *const c_char; + + pub fn dls_open( + dlp: *mut dls_link, + ddh: dls_dl_handle, + dsp: *mut dld_str_s, + ) -> c_int; + + pub fn dls_close(dsp: *mut dld_str_s); + + // These are stlouis-only methods used to enable the + // approach we're using here to get a Tx pathway via the + // existing primary MAC client on the underlay devices. + pub fn dld_str_create_detached() -> *mut dld_str_s; + pub fn dld_str_destroy_detached(val: *mut dld_str_s); + pub fn dld_str_mac_client_handle( + val: *mut dld_str_s, + ) -> *mut mac_client_handle; +} + +pub type dls_dl_handle = *mut dls_devnet_s; diff --git a/xde/src/lib.rs b/xde/src/lib.rs index 726f1ef4..1684813d 100644 --- a/xde/src/lib.rs +++ b/xde/src/lib.rs @@ -43,7 +43,6 @@ use illumos_sys_hdrs::KM_SLEEP; pub mod dls; pub mod ip; pub mod mac; -mod mac_sys; pub mod route; pub mod secpolicy; pub mod sys; @@ -117,25 +116,25 @@ fn _Unwind_Resume() -> ! { #[macro_export] macro_rules! warn { ($format:expr) => { - let msg = CString::new(format!($format)).unwrap(); + let msg = ::alloc::ffi::CString::new(format!($format)).unwrap(); #[allow(unused_unsafe)] - unsafe { cmn_err(CE_WARN, msg.as_ptr()) }; + unsafe { ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, msg.as_ptr()) }; }; ($format:expr, $($args:expr),*) => { - let msg = CString::new(format!($format, $($args),*)).unwrap(); + let msg = ::alloc::ffi::CString::new(format!($format, $($args),*)).unwrap(); #[allow(unused_unsafe)] - unsafe { cmn_err(CE_WARN, msg.as_ptr()) }; + unsafe { ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, msg.as_ptr()) }; }; } #[macro_export] macro_rules! note { ($format:expr) => { - let msg = CString::new(format!($format)); - cmn_err(CE_NOTE, msg.as_ptr()); + let msg = ::alloc::ffi::CString::new(format!($format)); + ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_NOTE, msg.as_ptr()); }; ($format:expr, $($args:expr),*) => { - let msg = CString::new(format!($format, $($args),*)); - cmn_err(CE_NOTE, msg.as_ptr()); + let msg = ::alloc::ffi::CString::new(format!($format, $($args),*)); + ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_NOTE, msg.as_ptr()); }; } diff --git a/xde/src/mac.rs b/xde/src/mac/mod.rs similarity index 82% rename from xde/src/mac.rs rename to xde/src/mac/mod.rs index 22dbb7f8..70cf0aa9 100644 --- a/xde/src/mac.rs +++ b/xde/src/mac/mod.rs @@ -2,13 +2,15 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company //! Safe abstractions for the mac client API. //! //! NOTE: This module is re-exporting all of the sys definitions at //! the moment out of laziness. -pub use super::mac_sys::*; +pub mod sys; + +use crate::dls::LinkId; use alloc::ffi::CString; use alloc::string::String; use alloc::string::ToString; @@ -22,6 +24,7 @@ use opte::engine::ether::EtherAddr; use opte::engine::packet::Initialized; use opte::engine::packet::Packet; use opte::engine::packet::PacketState; +pub use sys::*; /// Errors while opening a MAC handle. #[derive(Debug)] @@ -196,37 +199,6 @@ impl MacClientHandle { } } - /// Register promiscuous callback to receive packets on the underlying MAC. - pub fn add_promisc( - self: &Arc, - ptype: mac_client_promisc_type_t, - promisc_fn: mac_rx_fn, - flags: u16, - ) -> Result { - let mut mph = ptr::null_mut(); - - // `MacPromiscHandle` keeps a reference to this `MacClientHandle` - // until it is removed and so we can safely access it from the - // callback via the `arg` pointer. - let mch = Arc::into_raw(self.clone()); - let ret = unsafe { - mac_promisc_add( - self.mch, - ptype, - promisc_fn, - mch as *mut c_void, - &mut mph, - flags, - ) - }; - - if ret == 0 { - Ok(MacPromiscHandle { mph, mch }) - } else { - Err(ret) - } - } - /// Send the [`Packet`] on this client. /// /// If the packet cannot be sent, return it. If you want to drop @@ -298,24 +270,66 @@ impl Drop for MacClientHandle { } } +/// Structs which are (or contain) a usable MAC client. +/// +/// Currently, this is only used to enable promiscuous handler +/// registration. +pub trait MacClient { + fn mac_client_handle(&self) -> Result<*mut mac_client_handle, c_int>; +} + +impl MacClient for MacClientHandle { + fn mac_client_handle(&self) -> Result<*mut mac_client_handle, c_int> { + Ok(self.mch) + } +} + /// Safe wrapper around a `mac_promisc_handle_t`. #[derive(Debug)] -pub struct MacPromiscHandle { +pub struct MacPromiscHandle

{ /// The underlying `mac_promisc_handle_t`. mph: *mut mac_promisc_handle, - /// The `MacClientHandle` used to create this promiscuous callback. - mch: *const MacClientHandle, + /// The parent used to create this promiscuous callback. + parent: *const P, +} + +impl MacPromiscHandle

{ + /// Register a promiscuous callback to receive packets on the underlying MAC. + pub fn new( + parent: Arc

, + ptype: mac_client_promisc_type_t, + promisc_fn: mac_rx_fn, + flags: u16, + ) -> Result, c_int> { + let mut mph = ptr::null_mut(); + let mch = parent.mac_client_handle()?; + let parent = Arc::into_raw(parent); + let arg = parent as *mut c_void; + + // SAFETY: `MacPromiscHandle` keeps a reference to this `P` + // until it is removed and so we can safely access it from the + // callback via the `arg` pointer. + let ret = unsafe { + mac_promisc_add(mch, ptype, promisc_fn, arg, &mut mph, flags) + }; + + if ret == 0 { + Ok(Self { mph, parent }) + } else { + Err(ret) + } + } } -impl Drop for MacPromiscHandle { +impl

Drop for MacPromiscHandle

{ fn drop(&mut self) { // Safety: We know that a `MacPromiscHandle` can only exist if a // mac promisc handle was successfully obtained, and thus `mph` // is valid. unsafe { mac_promisc_remove(self.mph); - Arc::from_raw(self.mch); // dropped immediately + Arc::from_raw(self.parent); // dropped immediately }; } } @@ -338,3 +352,35 @@ impl Drop for MacUnicastHandle { unsafe { mac_unicast_remove(self.mch.mch, self.muh) }; } } + +/// Safe wrapper around a `mac_perim_handle_t`. +pub struct MacPerimeterHandle { + mph: mac_perim_handle, + link: LinkId, +} + +impl MacPerimeterHandle { + /// Attempt to acquire the MAC perimeter for a given link. + pub fn from_linkid(link: LinkId) -> Result { + let mut mph = 0; + let res = unsafe { mac_perim_enter_by_linkid(link.into(), &mut mph) }; + if res == 0 { + Ok(Self { mph, link }) + } else { + Err(res) + } + } + + /// Returns the ID of the link whose MAC perimeter is held. + pub fn link_id(&self) -> LinkId { + self.link + } +} + +impl Drop for MacPerimeterHandle { + fn drop(&mut self) { + unsafe { + mac_perim_exit(self.mph); + } + } +} diff --git a/xde/src/mac_sys.rs b/xde/src/mac/sys.rs similarity index 95% rename from xde/src/mac_sys.rs rename to xde/src/mac/sys.rs index 352193e7..e4fb9ee8 100644 --- a/xde/src/mac_sys.rs +++ b/xde/src/mac/sys.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company // stuff we need from mac @@ -11,6 +11,7 @@ use illumos_sys_hdrs::c_char; use illumos_sys_hdrs::c_int; use illumos_sys_hdrs::c_uint; use illumos_sys_hdrs::c_void; +use illumos_sys_hdrs::datalink_id_t; use illumos_sys_hdrs::ddi_info_cmd_t; use illumos_sys_hdrs::dev_info; use illumos_sys_hdrs::dev_ops; @@ -160,6 +161,24 @@ extern "C" { pub fn mac_private_minor() -> minor_t; } +// Private MAC functions needed to get us a Tx path. + +// Not quite a `void*` -- this includes extra flags etc. +pub type mac_perim_handle = uintptr_t; +extern "C" { + pub fn mac_perim_enter_by_mh(mh: mac_handle, mph: *mut mac_perim_handle); + pub fn mac_perim_enter_by_macname( + name: *const c_char, + mph: *mut mac_perim_handle, + ) -> c_int; + pub fn mac_perim_enter_by_linkid( + link: datalink_id_t, + mph: *mut mac_perim_handle, + ) -> c_int; + pub fn mac_perim_exit(mph: mac_perim_handle); + pub fn mac_perim_held(mh: mac_handle) -> boolean_t; +} + #[repr(C)] #[derive(Debug)] pub enum mac_diag { diff --git a/xde/src/xde.rs b/xde/src/xde.rs index e7859b22..c842c0eb 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -13,16 +13,15 @@ //#![allow(clippy::arc_with_non_send_sync)] use crate::dls; +use crate::dls::DlsStream; +use crate::dls::LinkId; use crate::ioctl::IoctlEnvelope; use crate::mac; use crate::mac::mac_getinfo; use crate::mac::mac_private_minor; -use crate::mac::MacClientHandle; use crate::mac::MacHandle; -use crate::mac::MacOpenFlags; use crate::mac::MacPromiscHandle; use crate::mac::MacTxFlags; -use crate::mac::MacUnicastHandle; use crate::route::Route; use crate::route::RouteCache; use crate::route::RouteKey; @@ -58,7 +57,6 @@ use opte::ddi::sync::KRwLock; use opte::ddi::sync::KRwLockType; use opte::ddi::time::Interval; use opte::ddi::time::Periodic; -use opte::engine::ether::EtherAddr; use opte::engine::geneve::Vni; use opte::engine::headers::EncapMeta; use opte::engine::headers::IpAddr; @@ -212,17 +210,12 @@ pub struct xde_underlay_port { /// The MAC address associated with this underlay port. pub mac: [u8; 6], - /// MAC handle to the underlay link. - mh: Arc, - - /// MAC client handle for tx/rx on the underlay link. - mch: Arc, - - /// MAC client handle for tx/rx on the underlay link. - muh: MacUnicastHandle, - /// MAC promiscuous handle for receiving packets on the underlay link. - mph: MacPromiscHandle, + mph: MacPromiscHandle, + + /// DLS-level handle on a device for promiscuous registration and + /// packet Tx. + stream: Arc, } struct XdeState { @@ -877,6 +870,7 @@ fn delete_xde(req: &DeleteXdeReq) -> Result { #[no_mangle] fn set_xde_underlay(req: &SetXdeUnderlayReq) -> Result { let state = get_xde_state(); + let mut underlay = state.underlay.lock(); if underlay.is_some() { return Err(OpteError::System { @@ -925,46 +919,26 @@ fn clear_xde_underlay() -> Result { }; for u in [u1, u2] { - // Clear all Rx paths - u.mch.clear_rx(); - - // We have a chain of refs here: - // 1. `MacPromiscHandle` holds a ref to `MacClientHandle`, and - // 2. `MacUnicastHandle` holds a ref to `MacClientHandle`, and - // 3. `MacClientHandle` holds a ref to `MacHandle`. - // We explicitly drop them in order here to ensure there are no - // outstanding refs. + // We have a chain of refs here: `MacPromiscHandle` holds a ref to + // `DldStream`. We explicitly drop them in order here to ensure + // there are no outstanding refs. - // 1. Remove promisc and unicast callbacks + // 1. Remove promisc callback. drop(u.mph); - drop(u.muh); // Although `xde_rx` can be called into without any running ports - // via the promisc and unicast handles, illumos guarantees that - // neither callback will be running here. `mac_promisc_remove` will - // either remove the callback immediately (if there are no walkers) - // or will mark the callback as condemned and await all active - // walkers finishing. Accordingly, no one else will have or try to - // clone the MAC client handle. - - // 2. Remove MAC client handle - if Arc::into_inner(u.mch).is_none() { - warn!( - "underlay {} has outstanding mac client handle refs", - u.name - ); - return Err(OpteError::System { - errno: EBUSY, - msg: format!("underlay {} has outstanding refs", u.name), - }); - } - - // Finally, we can cleanup the MAC handle for this underlay - if Arc::into_inner(u.mh).is_none() { + // via the promisc handle, illumos guarantees that this callback won't + // be running here. `mac_promisc_remove` will either remove the callback + // immediately (if there are no walkers) or will mark the callback as + // condemned and await all active walkers finishing. Accordingly, no one + // else will have or try to clone the Stream handle. + + // 2. Close the open stream handle. + if Arc::into_inner(u.stream).is_none() { return Err(OpteError::System { errno: EBUSY, msg: format!( - "underlay {} has outstanding mac handle refs", + "underlay {} has outstanding dls_stream refs", u.name ), }); @@ -1068,60 +1042,51 @@ unsafe extern "C" fn xde_attach( /// Setup underlay port atop the given link. fn create_underlay_port( link_name: String, - mc_name: &str, + // This parameter is likely to be used as part of the flows work. + _mc_name: &str, ) -> Result { - // Grab mac handle for underlying link - let mh = MacHandle::open_by_link_name(&link_name).map(Arc::new).map_err( - |e| OpteError::System { - errno: EFAULT, - msg: format!("failed to open link {link_name} for underlay: {e}"), - }, - )?; + let link_cstr = CString::new(link_name.as_str()).unwrap(); - // Get a mac client handle as well. - // - let oflags = MacOpenFlags::NONE; - let mch = MacClientHandle::open(&mh, Some(mc_name), oflags, 0) - .map(Arc::new) - .map_err(|e| OpteError::System { + let link_id = + LinkId::from_name(link_cstr).map_err(|err| OpteError::System { errno: EFAULT, - msg: format!("mac_client_open failed for {link_name}: {e}"), + msg: format!("failed to get linkid for {link_name}: {err}"), })?; + let stream = + Arc::new(DlsStream::open(link_id).map_err(|e| OpteError::System { + errno: EFAULT, + msg: format!("failed to grab open stream for {link_name}: {e}"), + })?); + // Setup promiscuous callback to receive all packets on this link. // // We specify `MAC_PROMISC_FLAGS_NO_TX_LOOP` here to skip receiving copies // of outgoing packets we sent ourselves. - let mph = mch - .add_promisc( - mac::mac_client_promisc_type_t::MAC_CLIENT_PROMISC_ALL, - xde_rx, - mac::MAC_PROMISC_FLAGS_NO_TX_LOOP, - ) - .map_err(|e| OpteError::System { - errno: EFAULT, - msg: format!("mac_promisc_add failed for {link_name}: {e}"), - })?; - - // Set up a unicast callback. The MAC address here is a sentinel value with - // nothing real behind it. This is why we picked the zero value in the Oxide - // OUI space for virtual MACs. The reason this is being done is that illumos - // requires that if there is a single mac client on a link, that client must - // have an L2 address. This was not caught until recently, because this is - // only enforced as a debug assert in the kernel. - let mac = EtherAddr::from([0xa8, 0x40, 0x25, 0xff, 0x00, 0x00]); - let muh = mch.add_unicast(mac).map_err(|e| OpteError::System { + let mph = MacPromiscHandle::new( + stream.clone(), + mac::mac_client_promisc_type_t::MAC_CLIENT_PROMISC_ALL, + xde_rx, + mac::MAC_PROMISC_FLAGS_NO_TX_LOOP, + ) + .map_err(|e| OpteError::System { errno: EFAULT, - msg: format!("mac_unicast_add failed for {link_name}: {e}"), + msg: format!("mac_promisc_add failed for {link_name}: {e}"), })?; + // Grab mac handle for underlying link, to retrieve its MAC address. + let mh = MacHandle::open_by_link_name(&link_name).map(Arc::new).map_err( + |e| OpteError::System { + errno: EFAULT, + msg: format!("failed to open link {link_name} for underlay: {e}"), + }, + )?; + Ok(xde_underlay_port { name: link_name, mac: mh.get_mac_addr(), - mh, - mch, mph, - muh, + stream, }) } @@ -1579,7 +1544,7 @@ unsafe fn xde_mc_tx_one( // Choose u1 as a starting point. This may be changed in the next_hop // function when we are actually able to determine what interface should be // used. - let mch = &src_dev.u1.mch; + let stream = &src_dev.u1.stream; let hint = 0; // Send straight to underlay in passthrough mode. @@ -1590,7 +1555,7 @@ unsafe fn xde_mc_tx_one( // refresh my memory on all of this. // // TODO Is there way to set mac_tx to must use result? - mch.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); + stream.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); return ptr::null_mut(); } @@ -1664,7 +1629,7 @@ unsafe fn xde_mc_tx_one( // Unwrap: We know the packet is good because we just // unwrapped it above. let new_pkt = Packet::::wrap_mblk(mblk).unwrap(); - underlay_dev.mch.tx_drop_on_no_desc( + underlay_dev.stream.tx_drop_on_no_desc( new_pkt, hint, MacTxFlags::empty(), @@ -1680,7 +1645,7 @@ unsafe fn xde_mc_tx_one( } Ok(ProcessResult::Bypass) => { - mch.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); + stream.tx_drop_on_no_desc(pkt, hint, MacTxFlags::empty()); } Err(_) => {} @@ -1827,9 +1792,9 @@ unsafe extern "C" fn xde_rx( // corresponding to the underlay port we're receiving on. Being // here in the callback means the `MacPromiscHandle` hasn't been // dropped yet and thus our `MacClientHandle` is also still valid. - let mch_ptr = arg as *const MacClientHandle; + let mch_ptr = arg as *const DlsStream; Arc::increment_strong_count(mch_ptr); - let mch: Arc = Arc::from_raw(mch_ptr); + let stream: Arc = Arc::from_raw(mch_ptr); let Ok(mut chain) = PacketChain::new(mp_chain) else { bad_packet_probe( @@ -1846,13 +1811,13 @@ unsafe extern "C" fn xde_rx( // of chains (port0, port1, ...), or hold tx until another // packet breaks the run targeting the same dest. while let Some(pkt) = chain.pop_front() { - xde_rx_one(&mch, mrh, pkt); + xde_rx_one(&stream, mrh, pkt); } } #[inline] unsafe fn xde_rx_one( - mch: &MacClientHandle, + stream: &DlsStream, mrh: *mut mac::mac_resource_handle, pkt: Packet, ) { @@ -1921,7 +1886,7 @@ unsafe fn xde_rx_one( mac::mac_rx(dev.mh, mrh, pkt.unwrap_mblk()); } Ok(ProcessResult::Hairpin(hppkt)) => { - mch.tx_drop_on_no_desc(hppkt, 0, MacTxFlags::empty()); + stream.tx_drop_on_no_desc(hppkt, 0, MacTxFlags::empty()); } _ => {} }