Skip to content

Commit

Permalink
Add support for userspace device drivers with HW offload mode (#322)
Browse files Browse the repository at this point in the history
* Add support for userspace drivers with HW offload mode

Add support for SR-IOV VFs using both hardware offload (switchdev)
and userspace device driver such as vfio-pci.

Signed-off-by: Taekyung Kim <[email protected]>

* Update MAC address if provided in HW offload mode

If the MAC address is provided from args, update the MAC address of the
VF to the provided MAC address via netlink.

Signed-off-by: Taekyung Kim <[email protected]>

---------

Signed-off-by: Taekyung Kim <[email protected]>
  • Loading branch information
kim-tae-kyung authored Aug 15, 2024
1 parent 44e6a7b commit d25ad9c
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 52 deletions.
63 changes: 44 additions & 19 deletions pkg/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,15 @@ func CmdAdd(args *skel.CmdArgs) error {
return err
}

// check if the device driver is the type of userspace driver
userspaceMode := false
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
userspaceMode, err = sriov.HasUserspaceDriver(netconf.DeviceID)
if err != nil {
return err
}
}

// removes all ports whose interfaces have an error
if err := cleanPorts(ovsBridgeDriver); err != nil {
return err
Expand All @@ -302,8 +311,9 @@ func CmdAdd(args *skel.CmdArgs) error {
}
defer contNetns.Close()

// userspace driver does not create a network interface for the VF on the host
var origIfName string
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) && !userspaceMode {
origIfName, err = sriov.GetVFLinkName(netconf.DeviceID)
if err != nil {
return err
Expand All @@ -312,13 +322,13 @@ func CmdAdd(args *skel.CmdArgs) error {

// Cache NetConf for CmdDel
if err = utils.SaveCache(config.GetCRef(args.ContainerID, args.IfName),
&types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName}); err != nil {
&types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName, UserspaceMode: userspaceMode}); err != nil {
return fmt.Errorf("error saving NetConf %q", err)
}

var hostIface, contIface *current.Interface
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, netconf.MTU, netconf.DeviceID)
hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, mac, netconf.MTU, netconf.DeviceID, userspaceMode)
if err != nil {
return err
}
Expand Down Expand Up @@ -353,7 +363,9 @@ func CmdAdd(args *skel.CmdArgs) error {
}

// run the IPAM plugin
if netconf.IPAM.Type != "" {
// userspace driver does not support IPAM plugin,
// because there is no network interface for the VF on the host
if netconf.IPAM.Type != "" && !userspaceMode {
var r cnitypes.Result
r, err = ipam.ExecAdd(netconf.IPAM.Type, args.StdinData)
defer func() {
Expand Down Expand Up @@ -562,8 +574,11 @@ func CmdDel(args *skel.CmdArgs) error {
// port is already deleted in a previous invocation.
log.Printf("Error: %v\n", err)
}
if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
return err
// there is no network interface in case of userspace driver, so OrigIfName is empty
if !cache.UserspaceMode {
if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
return err
}
}
} else {
// In accordance with the spec we clean up as many resources as possible.
Expand Down Expand Up @@ -591,11 +606,14 @@ func CmdDel(args *skel.CmdArgs) error {
}

if sriov.IsOvsHardwareOffloadEnabled(cache.Netconf.DeviceID) {
err = sriov.ReleaseVF(args, cache.OrigIfName)
if err != nil {
// try to reset vf into original state as much as possible in case of error
if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err)
// there is no network interface in case of userspace driver, so OrigIfName is empty
if !cache.UserspaceMode {
err = sriov.ReleaseVF(args, cache.OrigIfName)
if err != nil {
// try to reset vf into original state as much as possible in case of error
if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err)
}
}
}
} else {
Expand Down Expand Up @@ -633,14 +651,6 @@ func CmdCheck(args *skel.CmdArgs) error {
}
ovsHWOffloadEnable := sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID)

// run the IPAM plugin
if netconf.NetConf.IPAM.Type != "" {
err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData)
if err != nil {
return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err)
}
}

envArgs, err := getEnvArgs(args.Args)
if err != nil {
return err
Expand Down Expand Up @@ -672,6 +682,21 @@ func CmdCheck(args *skel.CmdArgs) error {
return err
}

// TODO: CmdCheck for userspace driver
if cache.UserspaceMode {
return nil
}

// run the IPAM plugin
// userspace driver does not support IPAM plugin,
// because there is no network interface for the VF on the host
if netconf.NetConf.IPAM.Type != "" && !cache.UserspaceMode {
err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData)
if err != nil {
return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err)
}
}

// Parse previous result.
if netconf.NetConf.RawPrevResult == nil {
return fmt.Errorf("Required prevResult missing")
Expand Down
164 changes: 135 additions & 29 deletions pkg/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package sriov

import (
"fmt"
"net"
"os"
"path/filepath"

Expand All @@ -32,7 +33,8 @@ import (

var (
// SysBusPci is sysfs pci device directory
SysBusPci = "/sys/bus/pci/devices"
SysBusPci = "/sys/bus/pci/devices"
UserspaceDrivers = []string{"vfio-pci", "uio_pci_generic", "igb_uio"}
)

// GetVFLinkName retrives interface name for given pci address
Expand Down Expand Up @@ -66,6 +68,27 @@ func IsOvsHardwareOffloadEnabled(deviceID string) bool {
return deviceID != ""
}

// HasUserspaceDriver checks if a device is attached to userspace driver
// This method is copied from https://github.com/k8snetworkplumbingwg/sriov-cni/blob/8af83a33b2cac8e2df0bd6276b76658eb7c790ab/pkg/utils/utils.go#L222
func HasUserspaceDriver(pciAddr string) (bool, error) {
driverLink := filepath.Join(SysBusPci, pciAddr, "driver")
driverPath, err := filepath.EvalSymlinks(driverLink)
if err != nil {
return false, err
}
driverStat, err := os.Stat(driverPath)
if err != nil {
return false, err
}
driverName := driverStat.Name()
for _, drv := range UserspaceDrivers {
if driverName == drv {
return true, nil
}
}
return false, nil
}

// GetBridgeUplinkNameByDeviceID tries to automatically resolve uplink interface name
// for provided VF deviceID by following the sequence:
// VF pci address > PF pci address > Bond (optional, if PF is part of a bond)
Expand Down Expand Up @@ -159,48 +182,33 @@ func GetNetRepresentor(deviceID string) (string, error) {
return rep, nil
}

// SetupSriovInterface moves smartVF into container namespace, rename it with ifName and also returns host interface with VF's representor device
func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int, deviceID string) (*current.Interface, *current.Interface, error) {
hostIface := &current.Interface{}
contIface := &current.Interface{}

// setupKernelSriovContIface moves smartVF into container namespace,
// configures the smartVF and also fills in the contIface fields
func setupKernelSriovContIface(contNetns ns.NetNS, contIface *current.Interface, deviceID string, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr, mtu int) error {
// get smart VF netdevice from PCI
vfNetdevices, err := sriovnet.GetNetDevicesFromPci(deviceID)
if err != nil {
return nil, nil, err
return err
}

// Make sure we have 1 netdevice per pci address
if len(vfNetdevices) != 1 {
return nil, nil, fmt.Errorf("failed to get one netdevice interface per %s", deviceID)
return fmt.Errorf("failed to get one netdevice interface per %s", deviceID)
}
vfNetdevice := vfNetdevices[0]

// network representor device for smartvf
rep, err := GetNetRepresentor(deviceID)
if err != nil {
return nil, nil, err
}

hostIface.Name = rep

link, err := netlink.LinkByName(hostIface.Name)
if err != nil {
return nil, nil, err
}
hostIface.Mac = link.Attrs().HardwareAddr.String()

// set MTU on smart VF representor
if mtu != 0 {
if err = netlink.LinkSetMTU(link, mtu); err != nil {
return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err)
// if MAC address is provided, set it to the VF by using PF netlink
// which is accessible in the host namespace, not in the container namespace
if hwaddr != nil {
if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil {
return err
}
}

// Move smart VF to Container namespace
err = moveIfToNetns(vfNetdevice, contNetns)
if err != nil {
return nil, nil, err
return err
}

err = contNetns.Do(func(hostNS ns.NetNS) error {
Expand All @@ -209,10 +217,20 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int
if err != nil {
return err
}
link, err = netlink.LinkByName(contIface.Name)
link, err := netlink.LinkByName(contIface.Name)
if err != nil {
return err
}
// if MAC address is provided, set it to the kernel VF netdevice
// otherwise, read the MAC address from the kernel VF netdevice
if hwaddr != nil {
if err = netlink.LinkSetHardwareAddr(link, hwaddr); err != nil {
return err
}
contIface.Mac = hwaddr.String()
} else {
contIface.Mac = link.Attrs().HardwareAddr.String()
}
if mtu != 0 {
if err = netlink.LinkSetMTU(link, mtu); err != nil {
return err
Expand All @@ -223,13 +241,101 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int
return err
}
contIface.Sandbox = contNetns.Path()
contIface.Mac = link.Attrs().HardwareAddr.String()

return nil
})
if err != nil {
return err
}

return nil
}

// setupUserspaceSriovContIface configures smartVF via PF netlink and fills in the contIface fields
func setupUserspaceSriovContIface(contNetns ns.NetNS, contIface *current.Interface, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr) error {
contIface.Name = ifName
contIface.Sandbox = contNetns.Path()

// if MAC address is provided, set it to the VF by using PF netlink
if hwaddr != nil {
if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil {
return err
}
contIface.Mac = hwaddr.String()
} else {
vfInfo := pfLink.Attrs().Vfs[vfIdx]
contIface.Mac = vfInfo.Mac.String()
}

return nil
}

// SetupSriovInterface configures smartVF and returns VF's representor device as host interface and VF's netdevice as container interface
func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName, mac string, mtu int, deviceID string, userspaceMode bool) (*current.Interface, *current.Interface, error) {
hostIface := &current.Interface{}
contIface := &current.Interface{}

// network representor device for smartvf
rep, err := GetNetRepresentor(deviceID)
if err != nil {
return nil, nil, err
}

hostIface.Name = rep

link, err := netlink.LinkByName(hostIface.Name)
if err != nil {
return nil, nil, err
}
hostIface.Mac = link.Attrs().HardwareAddr.String()

// get PF netlink and VF index from PCI address
pfIface, err := sriovnet.GetUplinkRepresentor(deviceID)
if err != nil {
return nil, nil, err
}
pfLink, err := netlink.LinkByName(pfIface)
if err != nil {
return nil, nil, err
}
vfIdx, err := sriovnet.GetVfIndexByPciAddress(deviceID)
if err != nil {
return nil, nil, err
}

// make sure PF netlink and VF index are valid
if len(pfLink.Attrs().Vfs) < vfIdx || pfLink.Attrs().Vfs[vfIdx].ID != vfIdx {
return nil, nil, fmt.Errorf("failed to get vf info from %s at index %d with Vfs %v", pfIface, vfIdx, pfLink.Attrs().Vfs)
}

// parse MAC address if provided from args as described
// in the CNI spec (https://github.com/containernetworking/cni/blob/main/CONVENTIONS.md)
var hwaddr net.HardwareAddr
if mac != "" {
hwaddr, err = net.ParseMAC(mac)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse MAC address %q: %v", mac, err)
}
}

// set MTU on smart VF representor
if mtu != 0 {
if err = netlink.LinkSetMTU(link, mtu); err != nil {
return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err)
}
}

if !userspaceMode {
// configure the smart VF netdevice directly in the container namespace
if err = setupKernelSriovContIface(contNetns, contIface, deviceID, pfLink, vfIdx, ifName, hwaddr, mtu); err != nil {
return nil, nil, err
}
} else {
// configure the smart VF netdevice via PF netlink
if err = setupUserspaceSriovContIface(contNetns, contIface, pfLink, vfIdx, ifName, hwaddr); err != nil {
return nil, nil, err
}
}

return hostIface, contIface, nil
}
Expand Down
10 changes: 6 additions & 4 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,15 @@ type Trunk struct {
ID *uint `json:"id,omitempty"`
}

// CachedNetConf containing NetConfig and original smartnic vf interface
// name (set only in case of ovs hareware offload scenario).
// CachedNetConf containing NetConfig, original smartnic vf interface name
// and kernel/userspace device driver mode of the smartnic vf interface
// (the last two are set only in case of ovs hareware offload scenario).
// this is intended to be used only for storing and retrieving config
// to/from a data store (example file cache).
type CachedNetConf struct {
Netconf *NetConf
OrigIfName string
Netconf *NetConf
OrigIfName string
UserspaceMode bool
}

// CachedPrevResultNetConf containing PrevResult.
Expand Down

0 comments on commit d25ad9c

Please sign in to comment.