mirror of
https://github.com/intel/intel-device-plugins-for-kubernetes.git
synced 2025-06-03 03:59:37 +00:00

The latest refactoring of FPGA scanning accidentally removed mode names from resource name prefixes. Restore them back.
316 lines
7.7 KiB
Go
316 lines
7.7 KiB
Go
// Copyright 2018 Intel Corporation. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package devicecache
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path"
|
|
"reflect"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/golang/glog"
|
|
|
|
pluginapi "k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1"
|
|
|
|
"github.com/intel/intel-device-plugins-for-kubernetes/internal/deviceplugin"
|
|
)
|
|
|
|
// Device Cache's mode of operation
|
|
const (
|
|
AfMode = "af"
|
|
RegionMode = "region"
|
|
RegionDevelMode = "regiondevel"
|
|
)
|
|
|
|
const (
|
|
sysfsDirectory = "/sys/class/fpga"
|
|
devfsDirectory = "/dev"
|
|
deviceRE = `^intel-fpga-dev.[0-9]+$`
|
|
portRE = `^intel-fpga-port.[0-9]+$`
|
|
fmeRE = `^intel-fpga-fme.[0-9]+$`
|
|
)
|
|
|
|
// UpdateInfo contains info added, updated and deleted FPGA devices
|
|
type UpdateInfo struct {
|
|
Added map[string]map[string]deviceplugin.DeviceInfo
|
|
Updated map[string]map[string]deviceplugin.DeviceInfo
|
|
Removed map[string]map[string]deviceplugin.DeviceInfo
|
|
}
|
|
|
|
type getDevMapFunc func(devices []device) map[string]map[string]deviceplugin.DeviceInfo
|
|
|
|
// getRegionMap returns mapping of region interface IDs to AF ports and FME devices
|
|
func getRegionDevelMap(devices []device) map[string]map[string]deviceplugin.DeviceInfo {
|
|
regionMap := make(map[string]map[string]deviceplugin.DeviceInfo)
|
|
|
|
for _, dev := range devices {
|
|
for _, region := range dev.regions {
|
|
fpgaID := fmt.Sprintf("%s-%s", RegionMode, region.interfaceID)
|
|
if _, present := regionMap[fpgaID]; !present {
|
|
regionMap[fpgaID] = make(map[string]deviceplugin.DeviceInfo)
|
|
}
|
|
devNodes := make([]string, len(region.afus)+1)
|
|
for num, afu := range region.afus {
|
|
devNodes[num] = afu.devNode
|
|
}
|
|
devNodes[len(region.afus)] = region.devNode
|
|
regionMap[fpgaID][region.id] = deviceplugin.DeviceInfo{
|
|
State: pluginapi.Healthy,
|
|
Nodes: devNodes,
|
|
}
|
|
}
|
|
}
|
|
|
|
return regionMap
|
|
}
|
|
|
|
// getRegionMap returns mapping of region interface IDs to AF ports only
|
|
func getRegionMap(devices []device) map[string]map[string]deviceplugin.DeviceInfo {
|
|
regionMap := make(map[string]map[string]deviceplugin.DeviceInfo)
|
|
|
|
for _, dev := range devices {
|
|
for _, region := range dev.regions {
|
|
fpgaID := fmt.Sprintf("%s-%s", RegionMode, region.interfaceID)
|
|
if _, present := regionMap[fpgaID]; !present {
|
|
regionMap[fpgaID] = make(map[string]deviceplugin.DeviceInfo)
|
|
}
|
|
devNodes := make([]string, len(region.afus))
|
|
for num, afu := range region.afus {
|
|
devNodes[num] = afu.devNode
|
|
}
|
|
regionMap[fpgaID][region.id] = deviceplugin.DeviceInfo{
|
|
State: pluginapi.Healthy,
|
|
Nodes: devNodes,
|
|
}
|
|
}
|
|
}
|
|
|
|
return regionMap
|
|
}
|
|
|
|
// getAfuMap returns mapping of AFU IDs to AF ports
|
|
func getAfuMap(devices []device) map[string]map[string]deviceplugin.DeviceInfo {
|
|
afuMap := make(map[string]map[string]deviceplugin.DeviceInfo)
|
|
|
|
for _, dev := range devices {
|
|
for _, region := range dev.regions {
|
|
for _, afu := range region.afus {
|
|
fpgaID := fmt.Sprintf("%s-%s", AfMode, afu.afuID)
|
|
if _, present := afuMap[fpgaID]; !present {
|
|
afuMap[fpgaID] = make(map[string]deviceplugin.DeviceInfo)
|
|
}
|
|
afuMap[fpgaID][afu.id] = deviceplugin.DeviceInfo{
|
|
State: pluginapi.Healthy,
|
|
Nodes: []string{afu.devNode},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return afuMap
|
|
}
|
|
|
|
type afu struct {
|
|
id string
|
|
afuID string
|
|
devNode string
|
|
}
|
|
|
|
type region struct {
|
|
id string
|
|
interfaceID string
|
|
devNode string
|
|
afus []afu
|
|
}
|
|
|
|
type device struct {
|
|
name string
|
|
regions []region
|
|
}
|
|
|
|
// Cache represents FPGA devices found on the host
|
|
type Cache struct {
|
|
sysfsDir string
|
|
devfsDir string
|
|
|
|
deviceReg *regexp.Regexp
|
|
portReg *regexp.Regexp
|
|
fmeReg *regexp.Regexp
|
|
|
|
devices []device
|
|
ch chan<- UpdateInfo
|
|
getDevMap getDevMapFunc
|
|
}
|
|
|
|
// NewCache returns new instance of Cache
|
|
func NewCache(sysfsDir string, devfsDir string, mode string, ch chan<- UpdateInfo) (*Cache, error) {
|
|
var getDevMap getDevMapFunc
|
|
|
|
switch mode {
|
|
case AfMode:
|
|
getDevMap = getAfuMap
|
|
case RegionMode:
|
|
getDevMap = getRegionMap
|
|
case RegionDevelMode:
|
|
getDevMap = getRegionDevelMap
|
|
default:
|
|
return nil, fmt.Errorf("Wrong mode: '%s'", mode)
|
|
}
|
|
|
|
return &Cache{
|
|
sysfsDir: sysfsDir,
|
|
devfsDir: devfsDir,
|
|
deviceReg: regexp.MustCompile(deviceRE),
|
|
portReg: regexp.MustCompile(portRE),
|
|
fmeReg: regexp.MustCompile(fmeRE),
|
|
ch: ch,
|
|
getDevMap: getDevMap,
|
|
}, nil
|
|
}
|
|
|
|
func (c *Cache) getDevNode(devName string) (string, error) {
|
|
devNode := path.Join(c.devfsDir, devName)
|
|
if _, err := os.Stat(devNode); err != nil {
|
|
return "", fmt.Errorf("Device %s doesn't exist: %v", devNode, err)
|
|
}
|
|
|
|
return devNode, nil
|
|
}
|
|
|
|
func (c *Cache) detectUpdates(devices []device) {
|
|
added := make(map[string]map[string]deviceplugin.DeviceInfo)
|
|
updated := make(map[string]map[string]deviceplugin.DeviceInfo)
|
|
|
|
oldDevMap := c.getDevMap(c.devices)
|
|
|
|
for fpgaID, new := range c.getDevMap(devices) {
|
|
if old, ok := oldDevMap[fpgaID]; ok {
|
|
if !reflect.DeepEqual(old, new) {
|
|
updated[fpgaID] = new
|
|
}
|
|
delete(oldDevMap, fpgaID)
|
|
} else {
|
|
added[fpgaID] = new
|
|
}
|
|
}
|
|
|
|
if len(added) > 0 || len(updated) > 0 || len(oldDevMap) > 0 {
|
|
c.ch <- UpdateInfo{
|
|
Added: added,
|
|
Updated: updated,
|
|
Removed: oldDevMap,
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *Cache) scanFPGAs() error {
|
|
var devices []device
|
|
|
|
glog.V(2).Info("Start new FPGA scan")
|
|
|
|
fpgaFiles, err := ioutil.ReadDir(c.sysfsDir)
|
|
if err != nil {
|
|
return fmt.Errorf("Can't read sysfs folder: %v", err)
|
|
}
|
|
|
|
for _, fpgaFile := range fpgaFiles {
|
|
fname := fpgaFile.Name()
|
|
|
|
if !c.deviceReg.MatchString(fname) {
|
|
continue
|
|
}
|
|
|
|
deviceFolder := path.Join(c.sysfsDir, fname)
|
|
deviceFiles, err := ioutil.ReadDir(deviceFolder)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var regions []region
|
|
var afus []afu
|
|
for _, deviceFile := range deviceFiles {
|
|
name := deviceFile.Name()
|
|
|
|
if c.fmeReg.MatchString(name) {
|
|
if len(regions) > 0 {
|
|
return fmt.Errorf("Detected more than one FPGA region for device %s. Only one region per FPGA device is supported", fname)
|
|
}
|
|
interfaceIDFile := path.Join(deviceFolder, name, "pr", "interface_id")
|
|
data, err := ioutil.ReadFile(interfaceIDFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
devNode, err := c.getDevNode(name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
regions = append(regions, region{
|
|
id: name,
|
|
interfaceID: strings.TrimSpace(string(data)),
|
|
devNode: devNode,
|
|
})
|
|
} else if c.portReg.MatchString(name) {
|
|
afuFile := path.Join(deviceFolder, name, "afu_id")
|
|
data, err := ioutil.ReadFile(afuFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
devNode, err := c.getDevNode(name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
afus = append(afus, afu{
|
|
id: name,
|
|
afuID: strings.TrimSpace(string(data)),
|
|
devNode: devNode,
|
|
})
|
|
}
|
|
}
|
|
|
|
if len(regions) == 0 {
|
|
return fmt.Errorf("No regions found for device %s", fname)
|
|
}
|
|
|
|
// Currently only one region per device is supported.
|
|
regions[0].afus = afus
|
|
devices = append(devices, device{
|
|
name: fname,
|
|
regions: regions,
|
|
})
|
|
}
|
|
|
|
c.detectUpdates(devices)
|
|
c.devices = devices
|
|
|
|
return nil
|
|
}
|
|
|
|
// Run starts scanning of FPGA devices on the host
|
|
func (c *Cache) Run() error {
|
|
for {
|
|
err := c.scanFPGAs()
|
|
if err != nil {
|
|
glog.Error("Device scan failed: ", err)
|
|
return fmt.Errorf("Device scan failed: %v", err)
|
|
}
|
|
|
|
time.Sleep(5 * time.Second)
|
|
}
|
|
}
|