mirror of
https://github.com/intel/intel-device-plugins-for-kubernetes.git
synced 2025-06-03 03:59:37 +00:00
415 lines
10 KiB
Go
415 lines
10 KiB
Go
// Copyright 2017 Intel Corporation. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/rest"
|
|
"k8s.io/client-go/tools/clientcmd"
|
|
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
|
utilnode "k8s.io/kubernetes/pkg/util/node"
|
|
|
|
"github.com/intel/intel-device-plugins-for-kubernetes/pkg/debug"
|
|
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
|
|
)
|
|
|
|
const (
|
|
sysfsDirectory = "/sys/class/fpga"
|
|
devfsDirectory = "/dev"
|
|
|
|
// Device plugin settings.
|
|
namespace = "fpga.intel.com"
|
|
annotationName = "com.intel.fpga.mode"
|
|
|
|
// Scanner's mode of operation
|
|
afMode = "af"
|
|
regionMode = "region"
|
|
regionDevelMode = "regiondevel"
|
|
|
|
deviceRE = `^intel-fpga-dev.[0-9]+$`
|
|
portRE = `^intel-fpga-port.[0-9]+$`
|
|
fmeRE = `^intel-fpga-fme.[0-9]+$`
|
|
|
|
// When the device's firmware crashes the driver reports these values
|
|
unhealthyAfuID = "ffffffffffffffffffffffffffffffff"
|
|
unhealthyInterfaceID = "ffffffffffffffffffffffffffffffff"
|
|
)
|
|
|
|
type getDevTreeFunc func(devices []device) dpapi.DeviceTree
|
|
|
|
// getRegionDevelTree returns mapping of region interface IDs to AF ports and FME devices
|
|
func getRegionDevelTree(devices []device) dpapi.DeviceTree {
|
|
regionTree := dpapi.NewDeviceTree()
|
|
|
|
for _, dev := range devices {
|
|
for _, region := range dev.regions {
|
|
health := pluginapi.Healthy
|
|
if region.interfaceID == unhealthyInterfaceID {
|
|
health = pluginapi.Unhealthy
|
|
}
|
|
devType := fmt.Sprintf("%s-%s", regionMode, region.interfaceID)
|
|
devNodes := make([]pluginapi.DeviceSpec, len(region.afus)+1)
|
|
for num, afu := range region.afus {
|
|
devNodes[num] = pluginapi.DeviceSpec{
|
|
HostPath: afu.devNode,
|
|
ContainerPath: afu.devNode,
|
|
Permissions: "rw",
|
|
}
|
|
}
|
|
devNodes[len(region.afus)] = pluginapi.DeviceSpec{
|
|
HostPath: region.devNode,
|
|
ContainerPath: region.devNode,
|
|
Permissions: "rw",
|
|
}
|
|
regionTree.AddDevice(devType, region.id, dpapi.DeviceInfo{
|
|
State: health,
|
|
Nodes: devNodes,
|
|
})
|
|
}
|
|
}
|
|
|
|
return regionTree
|
|
}
|
|
|
|
// getRegionTree returns mapping of region interface IDs to AF ports only
|
|
func getRegionTree(devices []device) dpapi.DeviceTree {
|
|
regionTree := dpapi.NewDeviceTree()
|
|
|
|
for _, dev := range devices {
|
|
for _, region := range dev.regions {
|
|
health := pluginapi.Healthy
|
|
if region.interfaceID == unhealthyInterfaceID {
|
|
health = pluginapi.Unhealthy
|
|
}
|
|
devType := fmt.Sprintf("%s-%s", regionMode, region.interfaceID)
|
|
devNodes := make([]pluginapi.DeviceSpec, len(region.afus))
|
|
for num, afu := range region.afus {
|
|
devNodes[num] = pluginapi.DeviceSpec{
|
|
HostPath: afu.devNode,
|
|
ContainerPath: afu.devNode,
|
|
Permissions: "rw",
|
|
}
|
|
}
|
|
regionTree.AddDevice(devType, region.id, dpapi.DeviceInfo{
|
|
State: health,
|
|
Nodes: devNodes,
|
|
})
|
|
}
|
|
}
|
|
|
|
return regionTree
|
|
}
|
|
|
|
// getAfuTree returns mapping of AFU IDs to AF ports
|
|
func getAfuTree(devices []device) dpapi.DeviceTree {
|
|
afuTree := dpapi.NewDeviceTree()
|
|
|
|
for _, dev := range devices {
|
|
for _, region := range dev.regions {
|
|
for _, afu := range region.afus {
|
|
health := pluginapi.Healthy
|
|
if afu.afuID == unhealthyAfuID {
|
|
health = pluginapi.Unhealthy
|
|
}
|
|
devType := fmt.Sprintf("%s-%s", afMode, afu.afuID)
|
|
afuTree.AddDevice(devType, afu.id, dpapi.DeviceInfo{
|
|
State: health,
|
|
Nodes: []pluginapi.DeviceSpec{
|
|
{
|
|
HostPath: afu.devNode,
|
|
ContainerPath: afu.devNode,
|
|
Permissions: "rw",
|
|
},
|
|
},
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return afuTree
|
|
}
|
|
|
|
type afu struct {
|
|
id string
|
|
afuID string
|
|
devNode string
|
|
}
|
|
|
|
type region struct {
|
|
id string
|
|
interfaceID string
|
|
devNode string
|
|
afus []afu
|
|
}
|
|
|
|
type device struct {
|
|
name string
|
|
regions []region
|
|
}
|
|
|
|
type devicePlugin struct {
|
|
sysfsDir string
|
|
devfsDir string
|
|
|
|
deviceReg *regexp.Regexp
|
|
portReg *regexp.Regexp
|
|
fmeReg *regexp.Regexp
|
|
|
|
getDevTree getDevTreeFunc
|
|
ignoreAfuIDs bool
|
|
annotationValue string
|
|
}
|
|
|
|
// newDevicePlugin returns new instance of devicePlugin
|
|
func newDevicePlugin(sysfsDir string, devfsDir string, mode string) (*devicePlugin, error) {
|
|
var getDevTree getDevTreeFunc
|
|
|
|
ignoreAfuIDs := false
|
|
annotationValue := ""
|
|
switch mode {
|
|
case afMode:
|
|
getDevTree = getAfuTree
|
|
case regionMode:
|
|
getDevTree = getRegionTree
|
|
ignoreAfuIDs = true
|
|
annotationValue = fmt.Sprintf("%s/%s", namespace, regionMode)
|
|
case regionDevelMode:
|
|
getDevTree = getRegionDevelTree
|
|
ignoreAfuIDs = true
|
|
default:
|
|
return nil, errors.Errorf("Wrong mode: '%s'", mode)
|
|
}
|
|
|
|
return &devicePlugin{
|
|
sysfsDir: sysfsDir,
|
|
devfsDir: devfsDir,
|
|
deviceReg: regexp.MustCompile(deviceRE),
|
|
portReg: regexp.MustCompile(portRE),
|
|
fmeReg: regexp.MustCompile(fmeRE),
|
|
getDevTree: getDevTree,
|
|
ignoreAfuIDs: ignoreAfuIDs,
|
|
annotationValue: annotationValue,
|
|
}, nil
|
|
}
|
|
|
|
func (dp *devicePlugin) PostAllocate(response *pluginapi.AllocateResponse) error {
|
|
// Set container annotations when programming is allowed
|
|
if len(dp.annotationValue) > 0 {
|
|
for _, containerResponse := range response.GetContainerResponses() {
|
|
containerResponse.Annotations = map[string]string{
|
|
annotationName: dp.annotationValue,
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Scan starts scanning FPGA devices on the host
|
|
func (dp *devicePlugin) Scan(notifier dpapi.Notifier) error {
|
|
for {
|
|
devTree, err := dp.scanFPGAs()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
notifier.Notify(devTree)
|
|
|
|
time.Sleep(5 * time.Second)
|
|
}
|
|
}
|
|
|
|
func (dp *devicePlugin) getDevNode(devName string) (string, error) {
|
|
devNode := path.Join(dp.devfsDir, devName)
|
|
if _, err := os.Stat(devNode); err != nil {
|
|
return "", errors.Wrapf(err, "Device %s doesn't exist", devNode)
|
|
}
|
|
|
|
return devNode, nil
|
|
}
|
|
|
|
func (dp *devicePlugin) scanFPGAs() (dpapi.DeviceTree, error) {
|
|
var devices []device
|
|
|
|
debug.Print("Start new FPGA scan")
|
|
|
|
fpgaFiles, err := ioutil.ReadDir(dp.sysfsDir)
|
|
if err != nil {
|
|
fmt.Printf("WARNING: Can't read folder %s. Kernel driver not loaded?\n", dp.sysfsDir)
|
|
return dp.getDevTree([]device{}), nil
|
|
}
|
|
|
|
for _, fpgaFile := range fpgaFiles {
|
|
fname := fpgaFile.Name()
|
|
|
|
if !dp.deviceReg.MatchString(fname) {
|
|
continue
|
|
}
|
|
|
|
deviceFolder := path.Join(dp.sysfsDir, fname)
|
|
deviceFiles, err := ioutil.ReadDir(deviceFolder)
|
|
if err != nil {
|
|
return nil, errors.WithStack(err)
|
|
}
|
|
|
|
regions, afus, err := dp.getSysFsInfo(deviceFolder, deviceFiles, fname)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(regions) == 0 {
|
|
return nil, errors.Errorf("No regions found for device %s", fname)
|
|
}
|
|
|
|
// Currently only one region per device is supported.
|
|
regions[0].afus = afus
|
|
devices = append(devices, device{
|
|
name: fname,
|
|
regions: regions,
|
|
})
|
|
}
|
|
|
|
return dp.getDevTree(devices), nil
|
|
}
|
|
|
|
func (dp *devicePlugin) getSysFsInfo(deviceFolder string, deviceFiles []os.FileInfo, fname string) ([]region, []afu, error) {
|
|
var regions []region
|
|
var afus []afu
|
|
for _, deviceFile := range deviceFiles {
|
|
name := deviceFile.Name()
|
|
|
|
if dp.fmeReg.MatchString(name) {
|
|
if len(regions) > 0 {
|
|
return nil, nil, errors.Errorf("Detected more than one FPGA region for device %s. Only one region per FPGA device is supported", fname)
|
|
}
|
|
interfaceIDFile := path.Join(deviceFolder, name, "pr", "interface_id")
|
|
data, err := ioutil.ReadFile(interfaceIDFile)
|
|
if err != nil {
|
|
return nil, nil, errors.WithStack(err)
|
|
}
|
|
devNode, err := dp.getDevNode(name)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
regions = append(regions, region{
|
|
id: name,
|
|
interfaceID: strings.TrimSpace(string(data)),
|
|
devNode: devNode,
|
|
})
|
|
} else if dp.portReg.MatchString(name) {
|
|
var afuID string
|
|
|
|
if dp.ignoreAfuIDs {
|
|
afuID = "unused_afu_id"
|
|
} else {
|
|
afuFile := path.Join(deviceFolder, name, "afu_id")
|
|
data, err := ioutil.ReadFile(afuFile)
|
|
if err != nil {
|
|
return nil, nil, errors.WithStack(err)
|
|
}
|
|
afuID = strings.TrimSpace(string(data))
|
|
}
|
|
devNode, err := dp.getDevNode(name)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
afus = append(afus, afu{
|
|
id: name,
|
|
afuID: afuID,
|
|
devNode: devNode,
|
|
})
|
|
}
|
|
}
|
|
|
|
return regions, afus, nil
|
|
}
|
|
|
|
func fatal(err error) {
|
|
fmt.Printf("ERROR: %+v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
func main() {
|
|
var mode string
|
|
var kubeconfig string
|
|
var master string
|
|
var config *rest.Config
|
|
var err error
|
|
var debugEnabled bool
|
|
|
|
flag.StringVar(&kubeconfig, "kubeconfig", "", "absolute path to the kubeconfig file")
|
|
flag.StringVar(&master, "master", "", "master url")
|
|
flag.StringVar(&mode, "mode", string(afMode),
|
|
fmt.Sprintf("device plugin mode: '%s' (default), '%s' or '%s'", afMode, regionMode, regionDevelMode))
|
|
flag.BoolVar(&debugEnabled, "debug", false, "enable debug output")
|
|
flag.Parse()
|
|
|
|
if debugEnabled {
|
|
debug.Activate()
|
|
}
|
|
|
|
if kubeconfig == "" {
|
|
config, err = rest.InClusterConfig()
|
|
} else {
|
|
config, err = clientcmd.BuildConfigFromFlags(master, kubeconfig)
|
|
}
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
// if NODE_NAME is not set then try to use hostname
|
|
nodeName, err := utilnode.GetHostname(os.Getenv("NODE_NAME"))
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
clientset, err := kubernetes.NewForConfig(config)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
node, err := clientset.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
if nodeMode, ok := node.ObjectMeta.Annotations["fpga.intel.com/device-plugin-mode"]; ok {
|
|
fmt.Println("Overriding mode to ", nodeMode)
|
|
mode = nodeMode
|
|
}
|
|
|
|
plugin, err := newDevicePlugin(sysfsDirectory, devfsDirectory, mode)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
fmt.Println("FPGA device plugin started in ", mode, " mode")
|
|
|
|
manager := dpapi.NewManager(namespace, plugin)
|
|
manager.Run()
|
|
}
|