mirror of
https://github.com/intel/intel-device-plugins-for-kubernetes.git
synced 2025-06-03 03:59:37 +00:00

The string of the sIOV device type exceeds the range of [[alnum]], such as: # adf_ctl status Checking status of all devices. There is 2 QAT acceleration device(s) in the system: qat_dev0 - type: vqat-adi, inst_id: 0, node_id: 0, bsf: 0000:00:08.0, #accel: 1 #engines: 1 state: up qat_dev1 - type: vqat-adi, inst_id: 1, node_id: 0, bsf: 0000:00:09.0, #accel: 1 #engines: 1 state: up
387 lines
10 KiB
Go
387 lines
10 KiB
Go
// Copyright 2018 Intel Corporation. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//go:build kerneldrv
|
|
// +build kerneldrv
|
|
|
|
// Package kerneldrv populates a device tree with QAT devices using the kernel QAT driver.
|
|
package kerneldrv
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/go-ini/ini"
|
|
"github.com/pkg/errors"
|
|
|
|
"k8s.io/klog/v2"
|
|
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
|
utilsexec "k8s.io/utils/exec"
|
|
|
|
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
|
|
)
|
|
|
|
var (
|
|
adfCtlRegex = regexp.MustCompile(`type: (?P<devtype>[[:print:]]+), .* inst_id: (?P<instid>[0-9]+), .* bsf: ([0-9a-f]{4}:)?(?P<bsf>[0-9a-f]{2}:[0-9a-f]{2}\.[0-9a-f]), .* state: (?P<state>[[:alpha:]]+)$`)
|
|
)
|
|
|
|
type endpoint struct {
|
|
id string
|
|
processes int
|
|
}
|
|
|
|
type section struct {
|
|
endpoints []endpoint
|
|
cryptoEngines int
|
|
compressionEngines int
|
|
pinned bool
|
|
}
|
|
|
|
type device struct {
|
|
id string
|
|
devtype string
|
|
bsf string
|
|
}
|
|
|
|
type driverConfig map[string]section
|
|
|
|
func newDeviceSpec(devPath string) pluginapi.DeviceSpec {
|
|
return pluginapi.DeviceSpec{
|
|
HostPath: devPath,
|
|
ContainerPath: devPath,
|
|
Permissions: "rw",
|
|
}
|
|
}
|
|
|
|
func getDevTree(sysfs string, qatDevs []device, config map[string]section) (dpapi.DeviceTree, error) {
|
|
devTree := dpapi.NewDeviceTree()
|
|
|
|
devs := []pluginapi.DeviceSpec{
|
|
newDeviceSpec("/dev/qat_adf_ctl"),
|
|
newDeviceSpec("/dev/qat_dev_processes"),
|
|
newDeviceSpec("/dev/usdm_drv"),
|
|
}
|
|
|
|
for _, qatDev := range qatDevs {
|
|
uiodevs, err := getUIODevices(sysfs, qatDev.devtype, qatDev.bsf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, uiodev := range uiodevs {
|
|
devs = append(devs, newDeviceSpec(filepath.Join("/dev/", uiodev)))
|
|
}
|
|
}
|
|
|
|
uniqID := 0
|
|
|
|
for sname, svalue := range config {
|
|
devType := fmt.Sprintf("cy%d_dc%d", svalue.cryptoEngines, svalue.compressionEngines)
|
|
|
|
for _, ep := range svalue.endpoints {
|
|
for i := 0; i < ep.processes; i++ {
|
|
envs := map[string]string{
|
|
fmt.Sprintf("QAT_SECTION_NAME_%s_%d", devType, uniqID): sname,
|
|
// This env variable may get overridden if a container requests more than one QAT process.
|
|
// But we keep this code since the majority of pod workloads run only one QAT process.
|
|
// The rest should use QAT_SECTION_NAME_XXX variables.
|
|
"QAT_SECTION_NAME": sname,
|
|
}
|
|
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devs, nil, envs, nil)
|
|
devTree.AddDevice(devType, fmt.Sprintf("%s_%s_%d", sname, ep.id, i), deviceInfo)
|
|
uniqID++
|
|
}
|
|
|
|
if !svalue.pinned {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return devTree, nil
|
|
}
|
|
|
|
// DevicePlugin represents QAT plugin exploiting kernel driver.
|
|
type DevicePlugin struct {
|
|
execer utilsexec.Interface
|
|
configDir string
|
|
}
|
|
|
|
// NewDevicePlugin returns new instance of kernel based QAT plugin.
|
|
func NewDevicePlugin() *DevicePlugin {
|
|
return newDevicePlugin("/etc", utilsexec.New())
|
|
}
|
|
|
|
func newDevicePlugin(configDir string, execer utilsexec.Interface) *DevicePlugin {
|
|
return &DevicePlugin{
|
|
execer: execer,
|
|
configDir: configDir,
|
|
}
|
|
}
|
|
|
|
func (dp *DevicePlugin) getOnlineDevices(iommuOn bool) ([]device, error) {
|
|
outputBytes, err := dp.execer.Command("adf_ctl", "status").CombinedOutput()
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "Can't get driver status")
|
|
}
|
|
|
|
devices := []device{}
|
|
|
|
// QAT Gen4 devices should not be used with "-mode kernel"
|
|
devicesDenyList := map[string]struct{}{
|
|
"4xxx": {},
|
|
"4xxxvf": {},
|
|
}
|
|
|
|
for _, line := range strings.Split(string(outputBytes[:]), "\n") {
|
|
matches := adfCtlRegex.FindStringSubmatch(line)
|
|
if len(matches) != 6 {
|
|
continue
|
|
}
|
|
|
|
// Ignore devices which are down.
|
|
if matches[5] != "up" {
|
|
continue
|
|
}
|
|
|
|
// Ignore devices which are on the denylist.
|
|
if _, ok := devicesDenyList[matches[1]]; ok {
|
|
klog.Warning("skip denylisted device ", matches[1])
|
|
continue
|
|
}
|
|
|
|
// "Cannot use PF with IOMMU enabled"
|
|
if iommuOn && !strings.HasSuffix(matches[1], "vf") {
|
|
continue
|
|
}
|
|
|
|
devices = append(devices, device{
|
|
id: fmt.Sprintf("dev%s", matches[2]),
|
|
devtype: matches[1],
|
|
bsf: fmt.Sprintf("%s%s", matches[3], matches[4]),
|
|
})
|
|
klog.V(4).Info("New online device", devices[len(devices)-1])
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func getUIODeviceListPath(sysfs, devtype, bsf string) string {
|
|
return filepath.Join(sysfs, "bus", "pci", "drivers", devtype, bsf, "uio")
|
|
}
|
|
|
|
func getUIODevices(sysfs, devtype, bsf string) ([]string, error) {
|
|
sysfsDir := getUIODeviceListPath(sysfs, devtype, bsf)
|
|
klog.V(4).Info("Path to uio devices:", sysfsDir)
|
|
|
|
devFiles, err := os.ReadDir(sysfsDir)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "Can't read %s", sysfsDir)
|
|
}
|
|
|
|
if len(devFiles) == 0 {
|
|
klog.Warning("no uio devices listed in", sysfsDir)
|
|
}
|
|
|
|
devices := []string{}
|
|
for _, devFile := range devFiles {
|
|
devices = append(devices, devFile.Name())
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func (dp *DevicePlugin) parseConfigs(devices []device) (map[string]section, error) {
|
|
devNum := 0
|
|
drvConfig := make(driverConfig)
|
|
|
|
for _, dev := range devices {
|
|
// Parse the configuration.
|
|
config, err := ini.Load(filepath.Join(dp.configDir, fmt.Sprintf("%s_%s.conf", dev.devtype, dev.id)))
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to parse device config")
|
|
}
|
|
devNum++
|
|
|
|
for _, section := range config.Sections() {
|
|
if section.Name() == "GENERAL" || section.Name() == "KERNEL" || section.Name() == "KERNEL_QAT" || section.Name() == ini.DefaultSection {
|
|
continue
|
|
}
|
|
|
|
klog.V(4).Info(section.Name())
|
|
|
|
if err := drvConfig.update(dev.id, section); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// check if the number of sections with LimitDevAccess=1 is equal to the number of endpoints
|
|
for sname, svalue := range drvConfig {
|
|
if svalue.pinned && len(svalue.endpoints) != devNum {
|
|
return nil, errors.Errorf("Section [%s] must be defined for all QAT devices since it contains LimitDevAccess=1", sname)
|
|
}
|
|
}
|
|
|
|
return drvConfig, nil
|
|
}
|
|
|
|
func (drvConfig driverConfig) update(devID string, iniSection *ini.Section) error {
|
|
numProcesses, err := iniSection.Key("NumProcesses").Int()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Can't parse NumProcesses in %s", iniSection.Name())
|
|
}
|
|
|
|
cryptoEngines, err := iniSection.Key("NumberCyInstances").Int()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Can't parse NumberCyInstances in %s", iniSection.Name())
|
|
}
|
|
|
|
compressionEngines, err := iniSection.Key("NumberDcInstances").Int()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Can't parse NumberDcInstances in %s", iniSection.Name())
|
|
}
|
|
|
|
pinned := false
|
|
|
|
if limitDevAccessKey, err := iniSection.GetKey("LimitDevAccess"); err == nil {
|
|
limitDevAccess, err := limitDevAccessKey.Bool()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Can't parse LimitDevAccess in %s", iniSection.Name())
|
|
}
|
|
|
|
if limitDevAccess {
|
|
pinned = true
|
|
}
|
|
}
|
|
|
|
if old, ok := drvConfig[iniSection.Name()]; ok {
|
|
// first check the sections are consistent across endpoints
|
|
if old.pinned != pinned {
|
|
return errors.Errorf("Value of LimitDevAccess must be consistent across all devices in %s", iniSection.Name())
|
|
}
|
|
|
|
if !pinned && old.endpoints[0].processes != numProcesses {
|
|
return errors.Errorf("For not pinned section \"%s\" NumProcesses must be equal for all devices", iniSection.Name())
|
|
}
|
|
|
|
if old.cryptoEngines != cryptoEngines || old.compressionEngines != compressionEngines {
|
|
return errors.Errorf("NumberCyInstances and NumberDcInstances must be consistent across all devices in %s", iniSection.Name())
|
|
}
|
|
|
|
// then add a new endpoint to the section
|
|
old.endpoints = append(old.endpoints, endpoint{
|
|
id: devID,
|
|
processes: numProcesses,
|
|
})
|
|
drvConfig[iniSection.Name()] = old
|
|
} else {
|
|
drvConfig[iniSection.Name()] = section{
|
|
endpoints: []endpoint{
|
|
{
|
|
id: devID,
|
|
processes: numProcesses,
|
|
},
|
|
},
|
|
cryptoEngines: cryptoEngines,
|
|
compressionEngines: compressionEngines,
|
|
pinned: pinned,
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func getIOMMUStatus() (bool, error) {
|
|
iommus, err := os.ReadDir("/sys/class/iommu/")
|
|
if err != nil {
|
|
return false, errors.Wrapf(err, "Unable to read IOMMU status")
|
|
}
|
|
|
|
if len(iommus) > 0 {
|
|
return true, nil
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
// Scan implements Scanner interface for kernel based QAT plugin.
|
|
func (dp *DevicePlugin) Scan(notifier dpapi.Notifier) error {
|
|
for {
|
|
iommuOn, err := getIOMMUStatus()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
devices, err := dp.getOnlineDevices(iommuOn)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
driverConfig, err := dp.parseConfigs(devices)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
devTree, err := getDevTree("/sys", devices, driverConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
notifier.Notify(devTree)
|
|
|
|
time.Sleep(5 * time.Second)
|
|
}
|
|
}
|
|
|
|
// PostAllocate implements PostAllocator interface for kernel based QAT plugin.
|
|
func (dp *DevicePlugin) PostAllocate(response *pluginapi.AllocateResponse) error {
|
|
for _, containerResponse := range response.GetContainerResponses() {
|
|
envsToDelete := []string{}
|
|
envsToAdd := make(map[string]string)
|
|
counter := 0
|
|
|
|
for key, value := range containerResponse.Envs {
|
|
if !strings.HasPrefix(key, "QAT_SECTION_NAME_") {
|
|
continue
|
|
}
|
|
|
|
parts := strings.Split(key, "_")
|
|
if len(parts) != 6 {
|
|
return errors.Errorf("Wrong format of env variable name %s", key)
|
|
}
|
|
|
|
prefix := strings.Join(parts[0:5], "_")
|
|
|
|
envsToDelete = append(envsToDelete, key)
|
|
envsToAdd[fmt.Sprintf("%s_%d", prefix, counter)] = value
|
|
counter++
|
|
}
|
|
|
|
for _, key := range envsToDelete {
|
|
delete(containerResponse.Envs, key)
|
|
}
|
|
|
|
for key, value := range envsToAdd {
|
|
containerResponse.Envs[key] = value
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|