mirror of
https://github.com/intel/intel-device-plugins-for-kubernetes.git
synced 2025-06-03 03:59:37 +00:00
400 lines
11 KiB
Go
400 lines
11 KiB
Go
// Copyright 2017-2021 Intel Corporation. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"k8s.io/klog/v2"
|
|
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
|
|
|
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm"
|
|
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/pluginutils"
|
|
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
|
|
)
|
|
|
|
const (
|
|
sysfsDrmDirectory = "/sys/class/drm"
|
|
devfsDriDirectory = "/dev/dri"
|
|
gpuDeviceRE = `^card[0-9]+$`
|
|
controlDeviceRE = `^controlD[0-9]+$`
|
|
vendorString = "0x8086"
|
|
|
|
// Device plugin settings.
|
|
namespace = "gpu.intel.com"
|
|
deviceType = "i915"
|
|
|
|
// telemetry resource settings.
|
|
monitorType = "i915_monitoring"
|
|
monitorID = "all"
|
|
|
|
// Period of device scans.
|
|
scanPeriod = 5 * time.Second
|
|
)
|
|
|
|
type cliOptions struct {
|
|
preferredAllocationPolicy string
|
|
sharedDevNum int
|
|
enableMonitoring bool
|
|
resourceManagement bool
|
|
}
|
|
|
|
type preferredAllocationPolicyFunc func(*pluginapi.ContainerPreferredAllocationRequest) []string
|
|
|
|
// nonePolicy is used for allocating GPU devices randomly.
|
|
func nonePolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
|
|
klog.V(2).Info("Select nonePolicy for GPU device allocation")
|
|
|
|
deviceIds := req.AvailableDeviceIDs[0:req.AllocationSize]
|
|
|
|
klog.V(2).Infof("Allocate deviceIds: %q", deviceIds)
|
|
|
|
return deviceIds
|
|
}
|
|
|
|
// balancedPolicy is used for allocating GPU devices in balance.
|
|
func balancedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
|
|
klog.V(2).Info("Select balancedPolicy for GPU device allocation")
|
|
|
|
// Save the shared-devices list of each physical GPU.
|
|
Card := make(map[string][]string)
|
|
// Save the available shared-nums of each physical GPU.
|
|
Count := make(map[string]int)
|
|
|
|
for _, deviceID := range req.AvailableDeviceIDs {
|
|
device := strings.Split(deviceID, "-")
|
|
Card[device[0]] = append(Card[device[0]], deviceID)
|
|
Count[device[0]]++
|
|
}
|
|
|
|
// Save the physical GPUs in order.
|
|
Index := make([]string, 0)
|
|
for key := range Count {
|
|
Index = append(Index, key)
|
|
sort.Strings(Card[key])
|
|
}
|
|
|
|
sort.Strings(Index)
|
|
|
|
need := req.AllocationSize
|
|
|
|
var deviceIds []string
|
|
|
|
// We choose one device ID from the GPU card that has most shared gpu IDs each time.
|
|
for {
|
|
var (
|
|
allocateCard string
|
|
max int
|
|
)
|
|
|
|
for _, key := range Index {
|
|
if Count[key] > max {
|
|
max = Count[key]
|
|
allocateCard = key
|
|
}
|
|
}
|
|
|
|
deviceIds = append(deviceIds, Card[allocateCard][0])
|
|
need--
|
|
|
|
if need == 0 {
|
|
break
|
|
}
|
|
|
|
// Update Maps
|
|
Card[allocateCard] = Card[allocateCard][1:]
|
|
Count[allocateCard]--
|
|
}
|
|
|
|
klog.V(2).Infof("Allocate deviceIds: %q", deviceIds)
|
|
|
|
return deviceIds
|
|
}
|
|
|
|
// packedPolicy is used for allocating GPU devices one by one.
|
|
func packedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
|
|
klog.V(2).Info("Select packedPolicy for GPU device allocation")
|
|
|
|
deviceIds := req.AvailableDeviceIDs
|
|
sort.Strings(deviceIds)
|
|
deviceIds = deviceIds[:req.AllocationSize]
|
|
|
|
klog.V(2).Infof("Allocate deviceIds: %q", deviceIds)
|
|
|
|
return deviceIds
|
|
}
|
|
|
|
type devicePlugin struct {
|
|
gpuDeviceReg *regexp.Regexp
|
|
controlDeviceReg *regexp.Regexp
|
|
|
|
scanTicker *time.Ticker
|
|
scanDone chan bool
|
|
|
|
resMan rm.ResourceManager
|
|
|
|
sysfsDir string
|
|
devfsDir string
|
|
|
|
// Note: If restarting the plugin with a new policy, the allocations for existing pods remain with old policy.
|
|
policy preferredAllocationPolicyFunc
|
|
options cliOptions
|
|
}
|
|
|
|
func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugin {
|
|
dp := &devicePlugin{
|
|
sysfsDir: sysfsDir,
|
|
devfsDir: devfsDir,
|
|
options: options,
|
|
gpuDeviceReg: regexp.MustCompile(gpuDeviceRE),
|
|
controlDeviceReg: regexp.MustCompile(controlDeviceRE),
|
|
scanTicker: time.NewTicker(scanPeriod),
|
|
scanDone: make(chan bool, 1), // buffered as we may send to it before Scan starts receiving from it
|
|
}
|
|
|
|
if options.resourceManagement {
|
|
var err error
|
|
|
|
dp.resMan, err = rm.NewResourceManager(monitorID, namespace+"/"+deviceType)
|
|
if err != nil {
|
|
klog.Errorf("Failed to create resource manager: %+v", err)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
switch options.preferredAllocationPolicy {
|
|
case "balanced":
|
|
dp.policy = balancedPolicy
|
|
case "packed":
|
|
dp.policy = packedPolicy
|
|
default:
|
|
dp.policy = nonePolicy
|
|
}
|
|
|
|
return dp
|
|
}
|
|
|
|
// Implement the PreferredAllocator interface.
|
|
func (dp *devicePlugin) GetPreferredAllocation(rqt *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
|
|
if dp.resMan != nil {
|
|
return dp.resMan.GetPreferredFractionalAllocation(rqt)
|
|
}
|
|
|
|
response := &pluginapi.PreferredAllocationResponse{}
|
|
|
|
for _, req := range rqt.ContainerRequests {
|
|
klog.V(3).Infof("AvailableDeviceIDs: %q", req.AvailableDeviceIDs)
|
|
klog.V(3).Infof("MustIncludeDeviceIDs: %q", req.MustIncludeDeviceIDs)
|
|
klog.V(3).Infof("AllocationSize: %d", req.AllocationSize)
|
|
|
|
// Add a security check here. This should never happen unless there occurs error in kubelet device plugin manager.
|
|
if req.AllocationSize > int32(len(req.AvailableDeviceIDs)) {
|
|
klog.V(3).Info("req.AllocationSize must be not greater than len(req.AvailableDeviceIDs).")
|
|
|
|
var err = errors.Errorf("AllocationSize (%d) is greater than the number of available device IDs (%d)", req.AllocationSize, len(req.AvailableDeviceIDs))
|
|
|
|
return nil, err
|
|
}
|
|
|
|
IDs := dp.policy(req)
|
|
|
|
resp := &pluginapi.ContainerPreferredAllocationResponse{
|
|
DeviceIDs: IDs,
|
|
}
|
|
|
|
response.ContainerResponses = append(response.ContainerResponses, resp)
|
|
}
|
|
|
|
return response, nil
|
|
}
|
|
|
|
func (dp *devicePlugin) Scan(notifier dpapi.Notifier) error {
|
|
defer dp.scanTicker.Stop()
|
|
|
|
var previouslyFound = -1
|
|
|
|
for {
|
|
devTree, err := dp.scan()
|
|
if err != nil {
|
|
klog.Warning("Failed to scan: ", err)
|
|
}
|
|
|
|
found := len(devTree)
|
|
if found != previouslyFound {
|
|
klog.V(1).Info("GPU scan update: devices found: ", found)
|
|
previouslyFound = found
|
|
}
|
|
|
|
notifier.Notify(devTree)
|
|
|
|
select {
|
|
case <-dp.scanDone:
|
|
return nil
|
|
case <-dp.scanTicker.C:
|
|
}
|
|
}
|
|
}
|
|
|
|
func (dp *devicePlugin) isCompatibleDevice(name string) bool {
|
|
if !dp.gpuDeviceReg.MatchString(name) {
|
|
klog.V(4).Info("Not compatible device: ", name)
|
|
return false
|
|
}
|
|
|
|
dat, err := os.ReadFile(path.Join(dp.sysfsDir, name, "device/vendor"))
|
|
if err != nil {
|
|
klog.Warning("Skipping. Can't read vendor file: ", err)
|
|
return false
|
|
}
|
|
|
|
if strings.TrimSpace(string(dat)) != vendorString {
|
|
klog.V(4).Info("Non-Intel GPU: ", name)
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
|
|
files, err := os.ReadDir(dp.sysfsDir)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Can't read sysfs folder")
|
|
}
|
|
|
|
var monitor []pluginapi.DeviceSpec
|
|
|
|
devTree := dpapi.NewDeviceTree()
|
|
rmDevInfos := rm.NewDeviceInfoMap()
|
|
|
|
for _, f := range files {
|
|
var nodes []pluginapi.DeviceSpec
|
|
|
|
if !dp.isCompatibleDevice(f.Name()) {
|
|
continue
|
|
}
|
|
|
|
drmFiles, err := os.ReadDir(path.Join(dp.sysfsDir, f.Name(), "device/drm"))
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Can't read device folder")
|
|
}
|
|
|
|
isPFwithVFs := pluginutils.IsSriovPFwithVFs(path.Join(dp.sysfsDir, f.Name()))
|
|
|
|
for _, drmFile := range drmFiles {
|
|
if dp.controlDeviceReg.MatchString(drmFile.Name()) {
|
|
//Skipping possible drm control node
|
|
continue
|
|
}
|
|
|
|
devPath := path.Join(dp.devfsDir, drmFile.Name())
|
|
if _, err := os.Stat(devPath); err != nil {
|
|
continue
|
|
}
|
|
|
|
// even querying metrics requires device to be writable
|
|
devSpec := pluginapi.DeviceSpec{
|
|
HostPath: devPath,
|
|
ContainerPath: devPath,
|
|
Permissions: "rw",
|
|
}
|
|
|
|
if !isPFwithVFs {
|
|
klog.V(4).Infof("Adding %s to GPU %s", devPath, f.Name())
|
|
|
|
nodes = append(nodes, devSpec)
|
|
}
|
|
|
|
if dp.options.enableMonitoring {
|
|
klog.V(4).Infof("Adding %s to GPU %s/%s", devPath, monitorType, monitorID)
|
|
|
|
monitor = append(monitor, devSpec)
|
|
}
|
|
}
|
|
|
|
if len(nodes) > 0 {
|
|
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)
|
|
|
|
for i := 0; i < dp.options.sharedDevNum; i++ {
|
|
devID := fmt.Sprintf("%s-%d", f.Name(), i)
|
|
// Currently only one device type (i915) is supported.
|
|
// TODO: check model ID to differentiate device models.
|
|
devTree.AddDevice(deviceType, devID, deviceInfo)
|
|
|
|
rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil)
|
|
}
|
|
}
|
|
}
|
|
// all Intel GPUs are under single monitoring resource
|
|
if len(monitor) > 0 {
|
|
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, monitor, nil, nil, nil)
|
|
devTree.AddDevice(monitorType, monitorID, deviceInfo)
|
|
}
|
|
|
|
if dp.resMan != nil {
|
|
dp.resMan.SetDevInfos(rmDevInfos)
|
|
}
|
|
|
|
return devTree, nil
|
|
}
|
|
|
|
func (dp *devicePlugin) Allocate(request *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
|
|
if dp.resMan != nil {
|
|
return dp.resMan.CreateFractionalResourceResponse(request)
|
|
}
|
|
|
|
return nil, &dpapi.UseDefaultMethodError{}
|
|
}
|
|
|
|
func main() {
|
|
var opts cliOptions
|
|
|
|
flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable 'i915_monitoring' (= all GPUs) resource")
|
|
flag.BoolVar(&opts.resourceManagement, "resource-manager", false, "fractional GPU resource management")
|
|
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
|
|
flag.StringVar(&opts.preferredAllocationPolicy, "allocation-policy", "none", "modes of allocating GPU devices: balanced, packed and none")
|
|
flag.Parse()
|
|
|
|
if opts.sharedDevNum < 1 {
|
|
klog.Error("The number of containers sharing the same GPU must greater than zero")
|
|
os.Exit(1)
|
|
}
|
|
|
|
if opts.sharedDevNum == 1 && opts.resourceManagement {
|
|
klog.Error("Trying to use fractional resources with shared-dev-num 1 is pointless")
|
|
os.Exit(1)
|
|
}
|
|
|
|
var str = opts.preferredAllocationPolicy
|
|
if !(str == "balanced" || str == "packed" || str == "none") {
|
|
klog.Error("invalid value for preferredAllocationPolicy, the valid values: balanced, packed, none")
|
|
os.Exit(1)
|
|
}
|
|
|
|
klog.V(1).Infof("GPU device plugin started with %s preferred allocation policy", opts.preferredAllocationPolicy)
|
|
|
|
plugin := newDevicePlugin(sysfsDrmDirectory, devfsDriDirectory, opts)
|
|
manager := dpapi.NewManager(namespace, plugin)
|
|
manager.Run()
|
|
}
|