diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go index 5aefe7a2..15d562ee 100644 --- a/pkg/plugin/plugin.go +++ b/pkg/plugin/plugin.go @@ -291,6 +291,15 @@ func CmdAdd(args *skel.CmdArgs) error { return err } + // check if the device driver is the type of userspace driver + userspaceMode := false + if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) { + userspaceMode, err = sriov.HasUserspaceDriver(netconf.DeviceID) + if err != nil { + return err + } + } + // removes all ports whose interfaces have an error if err := cleanPorts(ovsBridgeDriver); err != nil { return err @@ -302,8 +311,9 @@ func CmdAdd(args *skel.CmdArgs) error { } defer contNetns.Close() + // userspace driver does not create a network interface for the VF on the host var origIfName string - if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) { + if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) && !userspaceMode { origIfName, err = sriov.GetVFLinkName(netconf.DeviceID) if err != nil { return err @@ -312,13 +322,13 @@ func CmdAdd(args *skel.CmdArgs) error { // Cache NetConf for CmdDel if err = utils.SaveCache(config.GetCRef(args.ContainerID, args.IfName), - &types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName}); err != nil { + &types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName, UserspaceMode: userspaceMode}); err != nil { return fmt.Errorf("error saving NetConf %q", err) } var hostIface, contIface *current.Interface if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) { - hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, netconf.MTU, netconf.DeviceID) + hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, mac, netconf.MTU, netconf.DeviceID, userspaceMode) if err != nil { return err } @@ -353,7 +363,9 @@ func CmdAdd(args *skel.CmdArgs) error { } // run the IPAM plugin - if netconf.IPAM.Type != "" { + // userspace driver does not support IPAM plugin, + // because there is no network interface for the VF on the host + if netconf.IPAM.Type != "" && !userspaceMode { var r cnitypes.Result r, err = ipam.ExecAdd(netconf.IPAM.Type, args.StdinData) defer func() { @@ -562,8 +574,11 @@ func CmdDel(args *skel.CmdArgs) error { // port is already deleted in a previous invocation. log.Printf("Error: %v\n", err) } - if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { - return err + // there is no network interface in case of userspace driver, so OrigIfName is empty + if !cache.UserspaceMode { + if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { + return err + } } } else { // In accordance with the spec we clean up as many resources as possible. @@ -591,11 +606,14 @@ func CmdDel(args *skel.CmdArgs) error { } if sriov.IsOvsHardwareOffloadEnabled(cache.Netconf.DeviceID) { - err = sriov.ReleaseVF(args, cache.OrigIfName) - if err != nil { - // try to reset vf into original state as much as possible in case of error - if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { - log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err) + // there is no network interface in case of userspace driver, so OrigIfName is empty + if !cache.UserspaceMode { + err = sriov.ReleaseVF(args, cache.OrigIfName) + if err != nil { + // try to reset vf into original state as much as possible in case of error + if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil { + log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err) + } } } } else { @@ -633,14 +651,6 @@ func CmdCheck(args *skel.CmdArgs) error { } ovsHWOffloadEnable := sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) - // run the IPAM plugin - if netconf.NetConf.IPAM.Type != "" { - err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData) - if err != nil { - return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err) - } - } - envArgs, err := getEnvArgs(args.Args) if err != nil { return err @@ -672,6 +682,21 @@ func CmdCheck(args *skel.CmdArgs) error { return err } + // TODO: CmdCheck for userspace driver + if cache.UserspaceMode { + return nil + } + + // run the IPAM plugin + // userspace driver does not support IPAM plugin, + // because there is no network interface for the VF on the host + if netconf.NetConf.IPAM.Type != "" && !cache.UserspaceMode { + err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData) + if err != nil { + return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err) + } + } + // Parse previous result. if netconf.NetConf.RawPrevResult == nil { return fmt.Errorf("Required prevResult missing") diff --git a/pkg/sriov/sriov.go b/pkg/sriov/sriov.go index b3d598a8..0c5c6a03 100644 --- a/pkg/sriov/sriov.go +++ b/pkg/sriov/sriov.go @@ -19,6 +19,7 @@ package sriov import ( "fmt" + "net" "os" "path/filepath" @@ -32,7 +33,8 @@ import ( var ( // SysBusPci is sysfs pci device directory - SysBusPci = "/sys/bus/pci/devices" + SysBusPci = "/sys/bus/pci/devices" + UserspaceDrivers = []string{"vfio-pci", "uio_pci_generic", "igb_uio"} ) // GetVFLinkName retrives interface name for given pci address @@ -66,6 +68,27 @@ func IsOvsHardwareOffloadEnabled(deviceID string) bool { return deviceID != "" } +// HasUserspaceDriver checks if a device is attached to userspace driver +// This method is copied from https://github.com/k8snetworkplumbingwg/sriov-cni/blob/8af83a33b2cac8e2df0bd6276b76658eb7c790ab/pkg/utils/utils.go#L222 +func HasUserspaceDriver(pciAddr string) (bool, error) { + driverLink := filepath.Join(SysBusPci, pciAddr, "driver") + driverPath, err := filepath.EvalSymlinks(driverLink) + if err != nil { + return false, err + } + driverStat, err := os.Stat(driverPath) + if err != nil { + return false, err + } + driverName := driverStat.Name() + for _, drv := range UserspaceDrivers { + if driverName == drv { + return true, nil + } + } + return false, nil +} + // GetBridgeUplinkNameByDeviceID tries to automatically resolve uplink interface name // for provided VF deviceID by following the sequence: // VF pci address > PF pci address > Bond (optional, if PF is part of a bond) @@ -159,48 +182,33 @@ func GetNetRepresentor(deviceID string) (string, error) { return rep, nil } -// SetupSriovInterface moves smartVF into container namespace, rename it with ifName and also returns host interface with VF's representor device -func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int, deviceID string) (*current.Interface, *current.Interface, error) { - hostIface := ¤t.Interface{} - contIface := ¤t.Interface{} - +// setupKernelSriovContIface moves smartVF into container namespace, +// configures the smartVF and also fills in the contIface fields +func setupKernelSriovContIface(contNetns ns.NetNS, contIface *current.Interface, deviceID string, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr, mtu int) error { // get smart VF netdevice from PCI vfNetdevices, err := sriovnet.GetNetDevicesFromPci(deviceID) if err != nil { - return nil, nil, err + return err } // Make sure we have 1 netdevice per pci address if len(vfNetdevices) != 1 { - return nil, nil, fmt.Errorf("failed to get one netdevice interface per %s", deviceID) + return fmt.Errorf("failed to get one netdevice interface per %s", deviceID) } vfNetdevice := vfNetdevices[0] - // network representor device for smartvf - rep, err := GetNetRepresentor(deviceID) - if err != nil { - return nil, nil, err - } - - hostIface.Name = rep - - link, err := netlink.LinkByName(hostIface.Name) - if err != nil { - return nil, nil, err - } - hostIface.Mac = link.Attrs().HardwareAddr.String() - - // set MTU on smart VF representor - if mtu != 0 { - if err = netlink.LinkSetMTU(link, mtu); err != nil { - return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err) + // if MAC address is provided, set it to the VF by using PF netlink + // which is accessible in the host namespace, not in the container namespace + if hwaddr != nil { + if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil { + return err } } // Move smart VF to Container namespace err = moveIfToNetns(vfNetdevice, contNetns) if err != nil { - return nil, nil, err + return err } err = contNetns.Do(func(hostNS ns.NetNS) error { @@ -209,10 +217,20 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int if err != nil { return err } - link, err = netlink.LinkByName(contIface.Name) + link, err := netlink.LinkByName(contIface.Name) if err != nil { return err } + // if MAC address is provided, set it to the kernel VF netdevice + // otherwise, read the MAC address from the kernel VF netdevice + if hwaddr != nil { + if err = netlink.LinkSetHardwareAddr(link, hwaddr); err != nil { + return err + } + contIface.Mac = hwaddr.String() + } else { + contIface.Mac = link.Attrs().HardwareAddr.String() + } if mtu != 0 { if err = netlink.LinkSetMTU(link, mtu); err != nil { return err @@ -223,13 +241,101 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int return err } contIface.Sandbox = contNetns.Path() - contIface.Mac = link.Attrs().HardwareAddr.String() return nil }) + if err != nil { + return err + } + + return nil +} + +// setupUserspaceSriovContIface configures smartVF via PF netlink and fills in the contIface fields +func setupUserspaceSriovContIface(contNetns ns.NetNS, contIface *current.Interface, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr) error { + contIface.Name = ifName + contIface.Sandbox = contNetns.Path() + + // if MAC address is provided, set it to the VF by using PF netlink + if hwaddr != nil { + if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil { + return err + } + contIface.Mac = hwaddr.String() + } else { + vfInfo := pfLink.Attrs().Vfs[vfIdx] + contIface.Mac = vfInfo.Mac.String() + } + + return nil +} + +// SetupSriovInterface configures smartVF and returns VF's representor device as host interface and VF's netdevice as container interface +func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName, mac string, mtu int, deviceID string, userspaceMode bool) (*current.Interface, *current.Interface, error) { + hostIface := ¤t.Interface{} + contIface := ¤t.Interface{} + + // network representor device for smartvf + rep, err := GetNetRepresentor(deviceID) + if err != nil { + return nil, nil, err + } + + hostIface.Name = rep + + link, err := netlink.LinkByName(hostIface.Name) + if err != nil { + return nil, nil, err + } + hostIface.Mac = link.Attrs().HardwareAddr.String() + + // get PF netlink and VF index from PCI address + pfIface, err := sriovnet.GetUplinkRepresentor(deviceID) if err != nil { return nil, nil, err } + pfLink, err := netlink.LinkByName(pfIface) + if err != nil { + return nil, nil, err + } + vfIdx, err := sriovnet.GetVfIndexByPciAddress(deviceID) + if err != nil { + return nil, nil, err + } + + // make sure PF netlink and VF index are valid + if len(pfLink.Attrs().Vfs) < vfIdx || pfLink.Attrs().Vfs[vfIdx].ID != vfIdx { + return nil, nil, fmt.Errorf("failed to get vf info from %s at index %d with Vfs %v", pfIface, vfIdx, pfLink.Attrs().Vfs) + } + + // parse MAC address if provided from args as described + // in the CNI spec (https://github.com/containernetworking/cni/blob/main/CONVENTIONS.md) + var hwaddr net.HardwareAddr + if mac != "" { + hwaddr, err = net.ParseMAC(mac) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse MAC address %q: %v", mac, err) + } + } + + // set MTU on smart VF representor + if mtu != 0 { + if err = netlink.LinkSetMTU(link, mtu); err != nil { + return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err) + } + } + + if !userspaceMode { + // configure the smart VF netdevice directly in the container namespace + if err = setupKernelSriovContIface(contNetns, contIface, deviceID, pfLink, vfIdx, ifName, hwaddr, mtu); err != nil { + return nil, nil, err + } + } else { + // configure the smart VF netdevice via PF netlink + if err = setupUserspaceSriovContIface(contNetns, contIface, pfLink, vfIdx, ifName, hwaddr); err != nil { + return nil, nil, err + } + } return hostIface, contIface, nil } diff --git a/pkg/types/types.go b/pkg/types/types.go index 6e168115..d4a47d7e 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -71,13 +71,15 @@ type Trunk struct { ID *uint `json:"id,omitempty"` } -// CachedNetConf containing NetConfig and original smartnic vf interface -// name (set only in case of ovs hareware offload scenario). +// CachedNetConf containing NetConfig, original smartnic vf interface name +// and kernel/userspace device driver mode of the smartnic vf interface +// (the last two are set only in case of ovs hareware offload scenario). // this is intended to be used only for storing and retrieving config // to/from a data store (example file cache). type CachedNetConf struct { - Netconf *NetConf - OrigIfName string + Netconf *NetConf + OrigIfName string + UserspaceMode bool } // CachedPrevResultNetConf containing PrevResult.