mirror of
https://github.com/intel/intel-device-plugins-for-kubernetes.git
synced 2025-06-03 03:59:37 +00:00
604 lines
16 KiB
Go
604 lines
16 KiB
Go
// Copyright 2017-2022 Intel Corporation. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package dpdkdrv implements QAT device plugin for DPDK driver.
|
|
package dpdkdrv
|
|
|
|
import (
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/go-ini/ini"
|
|
"github.com/pkg/errors"
|
|
|
|
"k8s.io/klog/v2"
|
|
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
|
|
|
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
|
|
)
|
|
|
|
const (
|
|
uioDevicePath = "/dev"
|
|
vfioDevicePath = "/dev/vfio"
|
|
vfioCtrlDevicePath = vfioDevicePath + "/vfio"
|
|
uioMountPath = "/sys/class/uio"
|
|
pciDeviceDirectory = "/sys/bus/pci/devices"
|
|
pciDriverDirectory = "/sys/bus/pci/drivers"
|
|
uioSuffix = "uio"
|
|
iommuGroupSuffix = "iommu_group"
|
|
vendorPrefix = "8086 "
|
|
envVarPrefix = "QAT"
|
|
|
|
igbUio = "igb_uio"
|
|
vfioPci = "vfio-pci"
|
|
|
|
// Period of device scans.
|
|
scanPeriod = 5 * time.Second
|
|
|
|
// Resource name to use when device capabilities are not available.
|
|
defaultCapabilities = "generic"
|
|
)
|
|
|
|
// QAT PCI VF Device ID -> kernel QAT VF device driver mappings.
|
|
var qatDeviceDriver = map[string]string{
|
|
"0442": "dh895xccvf",
|
|
"0443": "dh895xccvf",
|
|
"18a1": "c4xxxvf",
|
|
"19e3": "c3xxxvf",
|
|
"4941": "4xxxvf",
|
|
"4943": "4xxxvf",
|
|
"37c9": "c6xxvf",
|
|
"6f55": "d15xxvf",
|
|
}
|
|
|
|
// swapBDF returns ["C1:B1:A1", "C2:B2:A2"], when the given parameter is ["A1:B1:C1", "A2:B2:C2"].
|
|
func swapBDF(devstrings []string) []string {
|
|
result := make([]string, len(devstrings))
|
|
|
|
for n, dev := range devstrings {
|
|
tmp := strings.Split(dev, ":")
|
|
result[n] = fmt.Sprintf("%v:%v:%v", tmp[2], tmp[1], tmp[0])
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
type preferredAllocationPolicyFunc func(*pluginapi.ContainerPreferredAllocationRequest) []string
|
|
|
|
// nonePolicy is used when no policy is specified.
|
|
func nonePolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
|
|
deviceIds := req.AvailableDeviceIDs
|
|
|
|
return deviceIds[:req.AllocationSize]
|
|
}
|
|
|
|
// balancedPolicy is used for allocating QAT devices in balance.
|
|
func balancedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
|
|
// make it "FDB" and string sort and change back to "BDF"
|
|
deviceIds := swapBDF(req.AvailableDeviceIDs)
|
|
sort.Strings(deviceIds)
|
|
deviceIds = swapBDF(deviceIds)
|
|
|
|
return deviceIds[:req.AllocationSize]
|
|
}
|
|
|
|
// packedPolicy is used for allocating QAT PF devices one by one.
|
|
func packedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
|
|
deviceIds := req.AvailableDeviceIDs
|
|
sort.Strings(deviceIds)
|
|
deviceIds = deviceIds[:req.AllocationSize]
|
|
|
|
return deviceIds
|
|
}
|
|
|
|
// DevicePlugin represents vfio based QAT plugin.
|
|
type DevicePlugin struct {
|
|
scanTicker *time.Ticker
|
|
scanDone chan bool
|
|
|
|
// Note: If restarting the plugin with a new policy, the allocations for existing pods remain with old policy.
|
|
policy preferredAllocationPolicyFunc
|
|
|
|
pciDriverDir string
|
|
pciDeviceDir string
|
|
dpdkDriver string
|
|
kernelVfDrivers []string
|
|
maxDevices int
|
|
}
|
|
|
|
// NewDevicePlugin returns new instance of vfio based QAT plugin.
|
|
func NewDevicePlugin(maxDevices int, kernelVfDrivers string, dpdkDriver string, preferredAllocationPolicy string) (*DevicePlugin, error) {
|
|
if !isValidDpdkDeviceDriver(dpdkDriver) {
|
|
return nil, errors.Errorf("wrong DPDK device driver: %s", dpdkDriver)
|
|
}
|
|
|
|
kernelDrivers := strings.Split(kernelVfDrivers, ",")
|
|
for _, driver := range kernelDrivers {
|
|
if !isValidKernelDriver(driver) {
|
|
return nil, errors.Errorf("wrong kernel VF driver: %s", driver)
|
|
}
|
|
}
|
|
|
|
allocationPolicyFunc := getAllocationPolicy(preferredAllocationPolicy)
|
|
if allocationPolicyFunc == nil {
|
|
return nil, errors.Errorf("wrong allocation policy: %s", preferredAllocationPolicy)
|
|
}
|
|
|
|
return newDevicePlugin(pciDriverDirectory, pciDeviceDirectory, maxDevices, kernelDrivers, dpdkDriver, allocationPolicyFunc), nil
|
|
}
|
|
|
|
// getAllocationPolicy returns a func that fits the policy given as a parameter. It returns nonePolicy when the flag is not set, and it returns nil when the policy is not valid value.
|
|
func getAllocationPolicy(preferredAllocationPolicy string) preferredAllocationPolicyFunc {
|
|
switch {
|
|
case !isFlagSet("allocation-policy"):
|
|
return nonePolicy
|
|
case preferredAllocationPolicy == "packed":
|
|
return packedPolicy
|
|
case preferredAllocationPolicy == "balanced":
|
|
return balancedPolicy
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// isFlagSet returns true when the flag that has the same name as the parameter is set.
|
|
func isFlagSet(name string) bool {
|
|
set := false
|
|
|
|
flag.Visit(func(f *flag.Flag) {
|
|
if f.Name == name {
|
|
set = true
|
|
}
|
|
})
|
|
|
|
return set
|
|
}
|
|
|
|
func newDevicePlugin(pciDriverDir, pciDeviceDir string, maxDevices int, kernelVfDrivers []string, dpdkDriver string, preferredAllocationPolicyFunc preferredAllocationPolicyFunc) *DevicePlugin {
|
|
return &DevicePlugin{
|
|
maxDevices: maxDevices,
|
|
pciDriverDir: pciDriverDir,
|
|
pciDeviceDir: pciDeviceDir,
|
|
kernelVfDrivers: kernelVfDrivers,
|
|
dpdkDriver: dpdkDriver,
|
|
scanTicker: time.NewTicker(scanPeriod),
|
|
scanDone: make(chan bool, 1),
|
|
policy: preferredAllocationPolicyFunc,
|
|
}
|
|
}
|
|
|
|
func (dp *DevicePlugin) setupDeviceIDs() error {
|
|
for devID, driver := range qatDeviceDriver {
|
|
for _, enabledDriver := range dp.kernelVfDrivers {
|
|
if driver != enabledDriver {
|
|
continue
|
|
}
|
|
|
|
err := writeToDriver(filepath.Join(dp.pciDriverDir, dp.dpdkDriver, "new_id"), vendorPrefix+devID)
|
|
if err != nil && !errors.Is(err, os.ErrExist) {
|
|
return errors.WithMessagef(err, "failed to set device ID %s for %s. Driver module not loaded?", devID, dp.dpdkDriver)
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Scan implements Scanner interface for vfio based QAT plugin.
|
|
func (dp *DevicePlugin) Scan(notifier dpapi.Notifier) error {
|
|
defer dp.scanTicker.Stop()
|
|
|
|
if err := dp.setupDeviceIDs(); err != nil {
|
|
return err
|
|
}
|
|
|
|
for {
|
|
devTree, err := dp.scan()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
notifier.Notify(devTree)
|
|
|
|
select {
|
|
case <-dp.scanDone:
|
|
return nil
|
|
case <-dp.scanTicker.C:
|
|
}
|
|
}
|
|
}
|
|
|
|
// Implement the PreferredAllocator interface.
|
|
func (dp *DevicePlugin) GetPreferredAllocation(rqt *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
|
|
response := &pluginapi.PreferredAllocationResponse{}
|
|
|
|
for _, req := range rqt.ContainerRequests {
|
|
// Add a security check here. This should never happen unless there occurs error in kubelet device plugin manager.
|
|
if req.AllocationSize > int32(len(req.AvailableDeviceIDs)) {
|
|
var err = errors.Errorf("AllocationSize (%d) is greater than the number of available device IDs (%d)", req.AllocationSize, len(req.AvailableDeviceIDs))
|
|
return nil, err
|
|
}
|
|
|
|
IDs := dp.policy(req)
|
|
klog.V(3).Infof("AvailableDeviceIDs: %q", req.AvailableDeviceIDs)
|
|
klog.V(3).Infof("AllocatedDeviceIDs: %q", IDs)
|
|
|
|
resp := &pluginapi.ContainerPreferredAllocationResponse{
|
|
DeviceIDs: IDs,
|
|
}
|
|
|
|
response.ContainerResponses = append(response.ContainerResponses, resp)
|
|
}
|
|
|
|
return response, nil
|
|
}
|
|
|
|
func (dp *DevicePlugin) getDpdkDevice(vfBdf string) (string, error) {
|
|
switch dp.dpdkDriver {
|
|
case igbUio:
|
|
uioDirPath := filepath.Join(dp.pciDeviceDir, vfBdf, uioSuffix)
|
|
|
|
files, err := os.ReadDir(uioDirPath)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if len(files) == 0 {
|
|
return "", errors.New("No devices found")
|
|
}
|
|
|
|
return files[0].Name(), nil
|
|
|
|
case vfioPci:
|
|
vfioDirPath := filepath.Join(dp.pciDeviceDir, vfBdf, iommuGroupSuffix)
|
|
group, err := filepath.EvalSymlinks(vfioDirPath)
|
|
|
|
if err != nil {
|
|
return "", errors.WithStack(err)
|
|
}
|
|
|
|
s := filepath.Base(group)
|
|
|
|
// If the kernel has CONFIG_VFIO_NOIOMMU enabled and the node admin
|
|
// has explicitly set enable_unsafe_noiommu_mode VFIO parameter,
|
|
// VFIO taints the kernel and writes "vfio-noiommu" to the IOMMU
|
|
// group name. If these conditions are true, the /dev/vfio/ devices
|
|
// are prefixed with "noiommu-".
|
|
if isVfioNoIOMMU(vfioDirPath) {
|
|
s = fmt.Sprintf("noiommu-%s", s)
|
|
}
|
|
|
|
return s, nil
|
|
|
|
default:
|
|
return "", errors.New("Unknown DPDK driver")
|
|
}
|
|
}
|
|
|
|
func isVfioNoIOMMU(iommuGroupPath string) bool {
|
|
if fileData, err := os.ReadFile(filepath.Join(iommuGroupPath, "name")); err == nil {
|
|
if strings.TrimSpace(string(fileData)) == "vfio-noiommu" {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func (dp *DevicePlugin) getDpdkDeviceSpecs(dpdkDeviceName string) []pluginapi.DeviceSpec {
|
|
switch dp.dpdkDriver {
|
|
case igbUio:
|
|
//Setting up with uio
|
|
uioDev := filepath.Join(uioDevicePath, dpdkDeviceName)
|
|
|
|
return []pluginapi.DeviceSpec{
|
|
{
|
|
HostPath: uioDev,
|
|
ContainerPath: uioDev,
|
|
Permissions: "rw",
|
|
},
|
|
}
|
|
case vfioPci:
|
|
//Setting up with vfio
|
|
vfioDev := filepath.Join(vfioDevicePath, dpdkDeviceName)
|
|
|
|
return []pluginapi.DeviceSpec{
|
|
{
|
|
HostPath: vfioDev,
|
|
ContainerPath: vfioDev,
|
|
Permissions: "rw",
|
|
},
|
|
{
|
|
HostPath: vfioCtrlDevicePath,
|
|
ContainerPath: vfioCtrlDevicePath,
|
|
Permissions: "rw",
|
|
},
|
|
}
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (dp *DevicePlugin) getDpdkMounts(dpdkDeviceName string) []pluginapi.Mount {
|
|
switch dp.dpdkDriver {
|
|
case igbUio:
|
|
//Setting up with uio mountpoints
|
|
uioMountPoint := filepath.Join(uioMountPath, dpdkDeviceName, "/device")
|
|
|
|
return []pluginapi.Mount{
|
|
{
|
|
HostPath: uioMountPoint,
|
|
ContainerPath: uioMountPoint,
|
|
},
|
|
}
|
|
case vfioPci:
|
|
//No mountpoint for vfio needs to be populated
|
|
return nil
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func getDeviceCapabilities(device string) (string, error) {
|
|
devID, err := getDeviceID(device)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "cannot determine device capabilities")
|
|
}
|
|
|
|
devicesWithCapabilities := map[string]struct{}{
|
|
"4941": {}, // Check QAT Gen4 (4xxx) VF PCI ID only
|
|
}
|
|
|
|
if _, ok := devicesWithCapabilities[devID]; !ok {
|
|
return defaultCapabilities, nil
|
|
}
|
|
|
|
pfDev, err := filepath.EvalSymlinks(filepath.Join(device, "physfn"))
|
|
if err != nil {
|
|
klog.Warningf("failed to get PF device ID for %s: %q", filepath.Base(device), err)
|
|
return defaultCapabilities, nil
|
|
}
|
|
|
|
// TODO: check the sysfs state entry first when it lands.
|
|
|
|
lOpts := ini.LoadOptions{
|
|
IgnoreInlineComment: true,
|
|
}
|
|
|
|
devCfgPath := filepath.Join(filepath.Dir(filepath.Join(pfDev, "../../")), "kernel/debug",
|
|
fmt.Sprintf("qat_4xxx_%s/dev_cfg", filepath.Base(pfDev)))
|
|
|
|
devCfg, err := ini.LoadSources(lOpts, devCfgPath)
|
|
if err != nil {
|
|
klog.Warningf("failed to read dev_cfg for %s: %q", filepath.Base(pfDev), err)
|
|
return defaultCapabilities, nil
|
|
}
|
|
|
|
switch devCfg.Section("GENERAL").Key("ServicesEnabled").String() {
|
|
case "sym;asym":
|
|
return "cy", nil
|
|
case "asym;sym":
|
|
return "cy", nil
|
|
case "dc":
|
|
return "dc", nil
|
|
case "sym":
|
|
return "sym", nil
|
|
case "asym":
|
|
return "asym", nil
|
|
default:
|
|
return defaultCapabilities, nil
|
|
}
|
|
}
|
|
|
|
func getDeviceID(device string) (string, error) {
|
|
devID, err := os.ReadFile(filepath.Join(device, "device"))
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "failed to read device ID")
|
|
}
|
|
|
|
return strings.TrimPrefix(string(bytes.TrimSpace(devID)), "0x"), nil
|
|
}
|
|
|
|
func writeToDriver(path, value string) error {
|
|
if err := os.WriteFile(path, []byte(value), 0600); err != nil {
|
|
return errors.Wrapf(err, "write to driver failed: %s", value)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func isValidKernelDriver(kernelvfDriver string) bool {
|
|
for _, driver := range qatDeviceDriver {
|
|
if driver == kernelvfDriver {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isValidDpdkDeviceDriver(dpdkDriver string) bool {
|
|
switch dpdkDriver {
|
|
case igbUio, vfioPci:
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func (dp *DevicePlugin) isValidVfDeviceID(vfDevID string) bool {
|
|
if driver, ok := qatDeviceDriver[vfDevID]; ok {
|
|
for _, enabledDriver := range dp.kernelVfDrivers {
|
|
if driver == enabledDriver {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
klog.Warningf("device ID %s is not a QAT device or not enabled by kernelVfDrivers.", vfDevID)
|
|
|
|
return false
|
|
}
|
|
|
|
// PostAllocate implements PostAllocator interface for vfio based QAT plugin.
|
|
func (dp *DevicePlugin) PostAllocate(response *pluginapi.AllocateResponse) error {
|
|
tempMap := make(map[string]string)
|
|
|
|
for _, cresp := range response.ContainerResponses {
|
|
counter := 0
|
|
|
|
for k := range cresp.Envs {
|
|
tempMap[strings.Join([]string{envVarPrefix, strconv.Itoa(counter)}, "")] = cresp.Envs[k]
|
|
counter++
|
|
}
|
|
|
|
cresp.Envs = tempMap
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func getPciDevicesWithPattern(pattern string) (pciDevices []string) {
|
|
pciDevices = make([]string, 0)
|
|
|
|
devs, err := filepath.Glob(pattern)
|
|
if err != nil {
|
|
klog.Warningf("bad pattern: %s", pattern)
|
|
return
|
|
}
|
|
|
|
for _, devBdf := range devs {
|
|
targetDev, err := filepath.EvalSymlinks(devBdf)
|
|
if err != nil {
|
|
klog.Warningf("unable to evaluate symlink: %s", devBdf)
|
|
continue
|
|
}
|
|
|
|
pciDevices = append(pciDevices, targetDev)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (dp *DevicePlugin) getVfDevices() []string {
|
|
qatPfDevices := make([]string, 0)
|
|
qatVfDevices := make([]string, 0)
|
|
|
|
// Get PF BDFs bound to a known QAT PF driver
|
|
for _, vfDriver := range dp.kernelVfDrivers {
|
|
pfDriver := strings.TrimSuffix(vfDriver, "vf")
|
|
pattern := filepath.Join(dp.pciDriverDir, pfDriver, "????:??:??.?")
|
|
qatPfDevices = append(qatPfDevices, getPciDevicesWithPattern(pattern)...)
|
|
}
|
|
|
|
// Get VF devices belonging to a valid QAT PF device
|
|
for _, qatPfDevice := range qatPfDevices {
|
|
pattern := filepath.Join(qatPfDevice, "virtfn*")
|
|
qatVfDevices = append(qatVfDevices, getPciDevicesWithPattern(pattern)...)
|
|
}
|
|
|
|
if len(qatPfDevices) > 0 {
|
|
if len(qatVfDevices) >= dp.maxDevices {
|
|
return qatVfDevices[:dp.maxDevices]
|
|
}
|
|
|
|
return qatVfDevices
|
|
}
|
|
|
|
// No PF devices with a QAT driver found, running in a VM?
|
|
pattern := filepath.Join(dp.pciDeviceDir, "????:??:??.?")
|
|
for _, pciDev := range getPciDevicesWithPattern(pattern) {
|
|
devID, err := getDeviceID(pciDev)
|
|
if err != nil {
|
|
klog.Warningf("unable to read device id for device %s: %q", filepath.Base(pciDev), err)
|
|
continue
|
|
}
|
|
|
|
if dp.isValidVfDeviceID(devID) {
|
|
qatVfDevices = append(qatVfDevices, pciDev)
|
|
}
|
|
}
|
|
|
|
if len(qatVfDevices) >= dp.maxDevices {
|
|
return qatVfDevices[:dp.maxDevices]
|
|
}
|
|
|
|
return qatVfDevices
|
|
}
|
|
|
|
func getCurrentDriver(device string) string {
|
|
symlink := filepath.Join(device, "driver")
|
|
|
|
driver, err := filepath.EvalSymlinks(symlink)
|
|
if err != nil {
|
|
klog.Infof("no driver bound to device %q", filepath.Base(device))
|
|
return ""
|
|
}
|
|
|
|
return filepath.Base(driver)
|
|
}
|
|
|
|
func (dp *DevicePlugin) scan() (dpapi.DeviceTree, error) {
|
|
devTree := dpapi.NewDeviceTree()
|
|
n := 0
|
|
|
|
for _, vfDevice := range dp.getVfDevices() {
|
|
vfBdf := filepath.Base(vfDevice)
|
|
|
|
if drv := getCurrentDriver(vfDevice); drv != dp.dpdkDriver {
|
|
if drv != "" {
|
|
err := writeToDriver(filepath.Join(dp.pciDriverDir, drv, "unbind"), vfBdf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
err := writeToDriver(filepath.Join(dp.pciDriverDir, dp.dpdkDriver, "bind"), vfBdf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
dpdkDeviceName, err := dp.getDpdkDevice(vfBdf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cap, err := getDeviceCapabilities(vfDevice)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
klog.V(1).Infof("Device %s with %s capabilities found", vfBdf, cap)
|
|
|
|
n = n + 1
|
|
envs := map[string]string{
|
|
fmt.Sprintf("%s%d", envVarPrefix, n): vfBdf,
|
|
}
|
|
|
|
devinfo := dpapi.NewDeviceInfo(pluginapi.Healthy, dp.getDpdkDeviceSpecs(dpdkDeviceName), dp.getDpdkMounts(dpdkDeviceName), envs, nil)
|
|
|
|
devTree.AddDevice(cap, vfBdf, devinfo)
|
|
}
|
|
|
|
return devTree, nil
|
|
}
|