From d25ad9c024b1f8d11a3d934819ace44073ca4fc8 Mon Sep 17 00:00:00 2001 From: Taekyung Kim Date: Thu, 15 Aug 2024 21:40:14 +0900 Subject: [PATCH] Add support for userspace device drivers with HW offload mode (#322) * Add support for userspace drivers with HW offload mode Add support for SR-IOV VFs using both hardware offload (switchdev) and userspace device driver such as vfio-pci. Signed-off-by: Taekyung Kim * Update MAC address if provided in HW offload mode If the MAC address is provided from args, update the MAC address of the VF to the provided MAC address via netlink. Signed-off-by: Taekyung Kim --------- Signed-off-by: Taekyung Kim --- pkg/plugin/plugin.go | 63 ++++++++++++----- pkg/sriov/sriov.go | 164 +++++++++++++++++++++++++++++++++++-------- pkg/types/types.go | 10 +-- 3 files changed, 185 insertions(+), 52 deletions(-) diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go index 5aefe7a2..15d562ee 100644 --- a/pkg/plugin/plugin.go +++ b/pkg/plugin/plugin.go @@ -291,6 +291,15 @@ func CmdAdd(args *skel.CmdArgs) error { return err } + // check if the device driver is the type of userspace driver + userspaceMode := false + if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) { + userspaceMode, err = sriov.HasUserspaceDriver(netconf.DeviceID) + if err != nil { + return err + } + } + // removes all ports whose interfaces have an error if err := cleanPorts(ovsBridgeDriver); err != nil { return err @@ -302,8 +311,9 @@ func CmdAdd(args *skel.CmdArgs) error { } defer contNetns.Close() + // userspace driver does not create a network interface for the VF on the host var origIfName string - if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) { + if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) && !userspaceMode { origIfName, err = sriov.GetVFLinkName(netconf.DeviceID) if err != nil { return err @@ -312,13 +322,13 @@ func CmdAdd(args *skel.CmdArgs) error { // Cache NetConf for CmdDel if err = utils.SaveCache(config.GetCRef(args.ContainerID, args.IfName), - &types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName}); err != nil { + &types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName, UserspaceMode: userspaceMode}); err != nil { return fmt.Errorf("error saving NetConf %q", err) } var hostIface, contIface *current.Interface if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) { - hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, netconf.MTU, netconf.DeviceID) + hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, mac, netconf.MTU, netconf.DeviceID, userspaceMode) if err != nil { return err } @@ -353,7 +363,9 @@ func CmdAdd(args *skel.CmdArgs) error { } // run the IPAM plugin - if netconf.IPAM.Type != "" { + // userspace driver does not support IPAM plugin, + // because there is no network interface for the VF on the host + if netconf.IPAM.Type != "" && !userspaceMode { var r cnitypes.Result r, err = ipam.ExecAdd(netconf.IPAM.Type, args.StdinData) defer func() { @@ -562,8 +574,11 @@ func CmdDel(args *skel.CmdArgs) error { // port is already deleted in a previous invocation. log.Printf("Error: %v\n", err) } - if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { - return err + // there is no network interface in case of userspace driver, so OrigIfName is empty + if !cache.UserspaceMode { + if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { + return err + } } } else { // In accordance with the spec we clean up as many resources as possible. @@ -591,11 +606,14 @@ func CmdDel(args *skel.CmdArgs) error { } if sriov.IsOvsHardwareOffloadEnabled(cache.Netconf.DeviceID) { - err = sriov.ReleaseVF(args, cache.OrigIfName) - if err != nil { - // try to reset vf into original state as much as possible in case of error - if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { - log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err) + // there is no network interface in case of userspace driver, so OrigIfName is empty + if !cache.UserspaceMode { + err = sriov.ReleaseVF(args, cache.OrigIfName) + if err != nil { + // try to reset vf into original state as much as possible in case of error + if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { + log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err) + } } } } else { @@ -633,14 +651,6 @@ func CmdCheck(args *skel.CmdArgs) error { } ovsHWOffloadEnable := sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) - // run the IPAM plugin - if netconf.NetConf.IPAM.Type != "" { - err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData) - if err != nil { - return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err) - } - } - envArgs, err := getEnvArgs(args.Args) if err != nil { return err @@ -672,6 +682,21 @@ func CmdCheck(args *skel.CmdArgs) error { return err } + // TODO: CmdCheck for userspace driver + if cache.UserspaceMode { + return nil + } + + // run the IPAM plugin + // userspace driver does not support IPAM plugin, + // because there is no network interface for the VF on the host + if netconf.NetConf.IPAM.Type != "" && !cache.UserspaceMode { + err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData) + if err != nil { + return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err) + } + } + // Parse previous result. if netconf.NetConf.RawPrevResult == nil { return fmt.Errorf("Required prevResult missing") diff --git a/pkg/sriov/sriov.go b/pkg/sriov/sriov.go index b3d598a8..0c5c6a03 100644 --- a/pkg/sriov/sriov.go +++ b/pkg/sriov/sriov.go @@ -19,6 +19,7 @@ package sriov import ( "fmt" + "net" "os" "path/filepath" @@ -32,7 +33,8 @@ import ( var ( // SysBusPci is sysfs pci device directory - SysBusPci = "/sys/bus/pci/devices" + SysBusPci = "/sys/bus/pci/devices" + UserspaceDrivers = []string{"vfio-pci", "uio_pci_generic", "igb_uio"} ) // GetVFLinkName retrives interface name for given pci address @@ -66,6 +68,27 @@ func IsOvsHardwareOffloadEnabled(deviceID string) bool { return deviceID != "" } +// HasUserspaceDriver checks if a device is attached to userspace driver +// This method is copied from https://github.com/k8snetworkplumbingwg/sriov-cni/blob/8af83a33b2cac8e2df0bd6276b76658eb7c790ab/pkg/utils/utils.go#L222 +func HasUserspaceDriver(pciAddr string) (bool, error) { + driverLink := filepath.Join(SysBusPci, pciAddr, "driver") + driverPath, err := filepath.EvalSymlinks(driverLink) + if err != nil { + return false, err + } + driverStat, err := os.Stat(driverPath) + if err != nil { + return false, err + } + driverName := driverStat.Name() + for _, drv := range UserspaceDrivers { + if driverName == drv { + return true, nil + } + } + return false, nil +} + // GetBridgeUplinkNameByDeviceID tries to automatically resolve uplink interface name // for provided VF deviceID by following the sequence: // VF pci address > PF pci address > Bond (optional, if PF is part of a bond) @@ -159,48 +182,33 @@ func GetNetRepresentor(deviceID string) (string, error) { return rep, nil } -// SetupSriovInterface moves smartVF into container namespace, rename it with ifName and also returns host interface with VF's representor device -func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int, deviceID string) (*current.Interface, *current.Interface, error) { - hostIface := ¤t.Interface{} - contIface := ¤t.Interface{} - +// setupKernelSriovContIface moves smartVF into container namespace, +// configures the smartVF and also fills in the contIface fields +func setupKernelSriovContIface(contNetns ns.NetNS, contIface *current.Interface, deviceID string, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr, mtu int) error { // get smart VF netdevice from PCI vfNetdevices, err := sriovnet.GetNetDevicesFromPci(deviceID) if err != nil { - return nil, nil, err + return err } // Make sure we have 1 netdevice per pci address if len(vfNetdevices) != 1 { - return nil, nil, fmt.Errorf("failed to get one netdevice interface per %s", deviceID) + return fmt.Errorf("failed to get one netdevice interface per %s", deviceID) } vfNetdevice := vfNetdevices[0] - // network representor device for smartvf - rep, err := GetNetRepresentor(deviceID) - if err != nil { - return nil, nil, err - } - - hostIface.Name = rep - - link, err := netlink.LinkByName(hostIface.Name) - if err != nil { - return nil, nil, err - } - hostIface.Mac = link.Attrs().HardwareAddr.String() - - // set MTU on smart VF representor - if mtu != 0 { - if err = netlink.LinkSetMTU(link, mtu); err != nil { - return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err) + // if MAC address is provided, set it to the VF by using PF netlink + // which is accessible in the host namespace, not in the container namespace + if hwaddr != nil { + if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil { + return err } } // Move smart VF to Container namespace err = moveIfToNetns(vfNetdevice, contNetns) if err != nil { - return nil, nil, err + return err } err = contNetns.Do(func(hostNS ns.NetNS) error { @@ -209,10 +217,20 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int if err != nil { return err } - link, err = netlink.LinkByName(contIface.Name) + link, err := netlink.LinkByName(contIface.Name) if err != nil { return err } + // if MAC address is provided, set it to the kernel VF netdevice + // otherwise, read the MAC address from the kernel VF netdevice + if hwaddr != nil { + if err = netlink.LinkSetHardwareAddr(link, hwaddr); err != nil { + return err + } + contIface.Mac = hwaddr.String() + } else { + contIface.Mac = link.Attrs().HardwareAddr.String() + } if mtu != 0 { if err = netlink.LinkSetMTU(link, mtu); err != nil { return err @@ -223,13 +241,101 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int return err } contIface.Sandbox = contNetns.Path() - contIface.Mac = link.Attrs().HardwareAddr.String() return nil }) + if err != nil { + return err + } + + return nil +} + +// setupUserspaceSriovContIface configures smartVF via PF netlink and fills in the contIface fields +func setupUserspaceSriovContIface(contNetns ns.NetNS, contIface *current.Interface, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr) error { + contIface.Name = ifName + contIface.Sandbox = contNetns.Path() + + // if MAC address is provided, set it to the VF by using PF netlink + if hwaddr != nil { + if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil { + return err + } + contIface.Mac = hwaddr.String() + } else { + vfInfo := pfLink.Attrs().Vfs[vfIdx] + contIface.Mac = vfInfo.Mac.String() + } + + return nil +} + +// SetupSriovInterface configures smartVF and returns VF's representor device as host interface and VF's netdevice as container interface +func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName, mac string, mtu int, deviceID string, userspaceMode bool) (*current.Interface, *current.Interface, error) { + hostIface := ¤t.Interface{} + contIface := ¤t.Interface{} + + // network representor device for smartvf + rep, err := GetNetRepresentor(deviceID) + if err != nil { + return nil, nil, err + } + + hostIface.Name = rep + + link, err := netlink.LinkByName(hostIface.Name) + if err != nil { + return nil, nil, err + } + hostIface.Mac = link.Attrs().HardwareAddr.String() + + // get PF netlink and VF index from PCI address + pfIface, err := sriovnet.GetUplinkRepresentor(deviceID) if err != nil { return nil, nil, err } + pfLink, err := netlink.LinkByName(pfIface) + if err != nil { + return nil, nil, err + } + vfIdx, err := sriovnet.GetVfIndexByPciAddress(deviceID) + if err != nil { + return nil, nil, err + } + + // make sure PF netlink and VF index are valid + if len(pfLink.Attrs().Vfs) < vfIdx || pfLink.Attrs().Vfs[vfIdx].ID != vfIdx { + return nil, nil, fmt.Errorf("failed to get vf info from %s at index %d with Vfs %v", pfIface, vfIdx, pfLink.Attrs().Vfs) + } + + // parse MAC address if provided from args as described + // in the CNI spec (https://github.com/containernetworking/cni/blob/main/CONVENTIONS.md) + var hwaddr net.HardwareAddr + if mac != "" { + hwaddr, err = net.ParseMAC(mac) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse MAC address %q: %v", mac, err) + } + } + + // set MTU on smart VF representor + if mtu != 0 { + if err = netlink.LinkSetMTU(link, mtu); err != nil { + return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err) + } + } + + if !userspaceMode { + // configure the smart VF netdevice directly in the container namespace + if err = setupKernelSriovContIface(contNetns, contIface, deviceID, pfLink, vfIdx, ifName, hwaddr, mtu); err != nil { + return nil, nil, err + } + } else { + // configure the smart VF netdevice via PF netlink + if err = setupUserspaceSriovContIface(contNetns, contIface, pfLink, vfIdx, ifName, hwaddr); err != nil { + return nil, nil, err + } + } return hostIface, contIface, nil } diff --git a/pkg/types/types.go b/pkg/types/types.go index 6e168115..d4a47d7e 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -71,13 +71,15 @@ type Trunk struct { ID *uint `json:"id,omitempty"` } -// CachedNetConf containing NetConfig and original smartnic vf interface -// name (set only in case of ovs hareware offload scenario). +// CachedNetConf containing NetConfig, original smartnic vf interface name +// and kernel/userspace device driver mode of the smartnic vf interface +// (the last two are set only in case of ovs hareware offload scenario). // this is intended to be used only for storing and retrieving config // to/from a data store (example file cache). type CachedNetConf struct { - Netconf *NetConf - OrigIfName string + Netconf *NetConf + OrigIfName string + UserspaceMode bool } // CachedPrevResultNetConf containing PrevResult.