Merge pull request #634 from chunfungintel/Intel_vpu_pci_device_plugin

Implement support for PCI-based VPU
This commit is contained in:
Mikko Ylinen 2021-05-19 20:08:00 +03:00 committed by GitHub
commit a207bfe4ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 326 additions and 37 deletions

View File

@ -34,6 +34,11 @@ This card has:
- 8 MyriadX VPUs
- PCIe interface to 6th+ Generation Core PC or Xeon E3/E5 server
[Gen 3 Intel® Movidius™ VPU HDDL VE3](https://www.intel.com/content/www/us/en/products/details/processors/movidius-vpu.html)
This card has:
- 3 Intel® Movidius Gen 3 Intel® Movidius™ VPU SoCs
> **Note:** This device plugin need HDDL daemon service to be running either natively or from a container.
> To get VCAC-A or Mustang card running hddl, please refer to:
> https://github.com/OpenVisualCloud/Dockerfiles/blob/master/VCAC-A/script/setup_hddl.sh
@ -88,6 +93,12 @@ daemonset.apps/intel-vpu-plugin created
the nodes' DAC rules must be configured to device plugin socket creation and kubelet registration.
Furthermore, the deployments `securityContext` must be configured with appropriate `runAsUser/runAsGroup`.
For xlink device, deploy DaemonSet as
```bash
$ kubectl apply -k deployments/vpu_plugin/overlays/xlink
daemonset.apps/intel-vpu-plugin created
```
### Deploy by hand
For development purposes, it is sometimes convenient to deploy the plugin 'by hand' on a node.

View File

@ -17,8 +17,11 @@ package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/google/gousb"
@ -43,33 +46,85 @@ const (
scanFrequency = 5 * time.Second
)
const (
vendorIDIntel = "0x8086"
xlinkDevNode = "/dev/xlnk"
hddlAlive = "/var/tmp/hddlunite_service_alive.mutex"
hddlReady = "/var/tmp/hddlunite_service_ready.mutex"
hddlStartExit = "/var/tmp/hddlunite_service_start_exit.mutex"
hddlSocketPci = "/var/tmp/hddlunite_service.sock"
sysBusPCIDevice = "/sys/bus/pci/devices"
)
var (
// Movidius MyriadX Product IDs.
productIDs = []int{0x2485, 0xf63b}
// PCI Product IDs.
productIDsPCI = []PCIPidDeviceType{{[]string{"0x6240"}, "kmb", 1}}
)
type gousbContext interface {
OpenDevices(opener func(desc *gousb.DeviceDesc) bool) ([]*gousb.Device, error)
}
type PCIPidDeviceType struct {
pids []string
deviceType string
ratio int
}
func getPciDeviceCounts(sysfsPciDevicesPath string, vendorID string, pidSearch []PCIPidDeviceType) ([]int, error) {
found := make([]int, len(pidSearch))
bdf, _ := ioutil.ReadDir(sysfsPciDevicesPath)
// Check for all folder inside sysfs
for _, bus := range bdf {
// Extract vid and pid
vidRaw, _ := ioutil.ReadFile(filepath.Join(sysfsPciDevicesPath, bus.Name(), "vendor"))
pidRaw, _ := ioutil.ReadFile(filepath.Join(sysfsPciDevicesPath, bus.Name(), "device"))
vid := strings.TrimSpace(string(vidRaw))
pid := strings.TrimSpace(string(pidRaw))
// Loop for supported VPU type: kmb
for i, pciPid := range pidSearch {
// Loop for list of pid of supported device type
for _, pidVPU := range pciPid.pids {
if vid == vendorID && pid == pidVPU {
found[i] += 1
}
}
}
}
return found, nil
}
type devicePlugin struct {
usbContext gousbContext
vendorID int
productIDs []int
deviceCtx interface{}
sharedDevNum int
scanTicker *time.Ticker
scanDone chan bool
}
func newDevicePlugin(usbContext gousbContext, vendorID int, productIDs []int, sharedDevNum int) *devicePlugin {
type devicePluginUsb struct {
usbContext gousbContext
vendorID int
productIDs []int
}
type devicePluginPci struct {
sysfsPciDevicesPath string
vendorIDPCI string
productIDsPCI []PCIPidDeviceType
}
func newDevicePlugin(deviceCtx interface{}, sharedDevNum int) *devicePlugin {
if sharedDevNum < 1 {
klog.V(1).Info("The number of containers sharing the same VPU must greater than zero")
return nil
}
return &devicePlugin{
usbContext: usbContext,
vendorID: vendorID,
productIDs: productIDs,
deviceCtx: deviceCtx,
sharedDevNum: sharedDevNum,
scanTicker: time.NewTicker(scanFrequency),
scanDone: make(chan bool, 1),
@ -103,21 +158,25 @@ func fileExists(filename string) bool {
return false
}
func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
func (dp *devicePlugin) scanUsb(devTree *dpapi.DeviceTree) {
var nUsb int
devTree := dpapi.NewDeviceTree()
// first check if HDDL sock is there
if !fileExists(hddlSockPath) {
return devTree, nil
return
}
devs, err := dp.usbContext.OpenDevices(func(desc *gousb.DeviceDesc) bool {
deviceCtx, ok := dp.deviceCtx.(devicePluginUsb)
if !ok {
klog.V(4).Infof("wrong context %s", ok)
}
devs, err := deviceCtx.usbContext.OpenDevices(func(desc *gousb.DeviceDesc) bool {
thisVendor := desc.Vendor
thisProduct := desc.Product
for _, v := range dp.productIDs {
klog.V(4).Infof("checking %04x,%04x vs %s,%s", dp.vendorID, v, thisVendor.String(), thisProduct.String())
if (gousb.ID(dp.vendorID) == thisVendor) && (gousb.ID(v) == thisProduct) {
for _, v := range deviceCtx.productIDs {
klog.V(4).Infof("checking %04x,%04x vs %s,%s", deviceCtx.vendorID, v, thisVendor.String(), thisProduct.String())
if (gousb.ID(deviceCtx.vendorID) == thisVendor) && (gousb.ID(v) == thisProduct) {
nUsb++
}
}
@ -163,29 +222,110 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
devTree.AddDevice(deviceType, devID, dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil))
}
}
}
func (dp *devicePlugin) scanPci(devTree *dpapi.DeviceTree) {
// first check if HDDL sock is there
if !fileExists(hddlSocketPci) {
return
}
deviceCtx, ok := dp.deviceCtx.(devicePluginPci)
if !ok {
klog.V(4).Infof("wrong context %s", ok)
}
// Get all PCI devices
pciFound, err := getPciDeviceCounts(deviceCtx.sysfsPciDevicesPath, deviceCtx.vendorIDPCI, deviceCtx.productIDsPCI)
if err != nil {
klog.V(4).Infof("list pci device %s", err)
}
// Mount VPU
for i := 0; i < len(pciFound); i++ {
deviceTypePci := deviceCtx.productIDsPCI[i].deviceType
deviceRatio := deviceCtx.productIDsPCI[i].ratio
// If device found
if remainder := pciFound[i] % deviceRatio; remainder == 0 {
count := pciFound[i] / deviceRatio
nodes := []pluginapi.DeviceSpec{
{
HostPath: xlinkDevNode,
ContainerPath: xlinkDevNode,
Permissions: "rw",
},
}
mounts := []pluginapi.Mount{
{
HostPath: hddlAlive,
ContainerPath: hddlAlive,
},
{
HostPath: hddlReady,
ContainerPath: hddlReady,
},
{
HostPath: hddlStartExit,
ContainerPath: hddlStartExit,
},
{
HostPath: hddlSocketPci,
ContainerPath: hddlSocketPci,
},
}
// Mount all devices
for i := 0; i < count; i++ {
devID := fmt.Sprintf("%s-device-%d", deviceTypePci, i)
// VPU pci device found and added to node
klog.V(1).Info(devID)
devTree.AddDevice(deviceTypePci, devID, dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil))
}
}
}
}
func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
devTree := dpapi.NewDeviceTree()
switch dp.deviceCtx.(type) {
case devicePluginUsb:
dp.scanUsb(&devTree)
case devicePluginPci:
dp.scanPci(&devTree)
default:
}
return devTree, nil
}
func main() {
var sharedDevNum int
var scanMode int
flag.IntVar(&sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same VPU device")
flag.IntVar(&scanMode, "mode", 1, "USB=1 PCI=2")
flag.Parse()
klog.V(1).Info("VPU device plugin started")
// add lsusb here
ctx := gousb.NewContext()
defer ctx.Close()
var plugin *devicePlugin
if scanMode == 1 {
// add lsusb here
ctx := gousb.NewContext()
defer ctx.Close()
verbosityLevel, err := strconv.Atoi(flag.CommandLine.Lookup("v").Value.String())
if err == nil {
// gousb (libusb) Debug levels are a 1:1 match to klog levels, just pass through.
ctx.Debug(verbosityLevel)
verbosityLevel, err := strconv.Atoi(flag.CommandLine.Lookup("v").Value.String())
if err == nil {
// gousb (libusb) Debug levels are a 1:1 match to klog levels, just pass through.
ctx.Debug(verbosityLevel)
}
deviceCtxUsb := devicePluginUsb{usbContext: ctx, vendorID: vendorID, productIDs: productIDs}
plugin = newDevicePlugin(deviceCtxUsb, sharedDevNum)
} else if scanMode == 2 {
deviceCtxPci := devicePluginPci{sysfsPciDevicesPath: sysBusPCIDevice, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}
plugin = newDevicePlugin(deviceCtxPci, sharedDevNum)
}
plugin := newDevicePlugin(ctx, vendorID, productIDs, sharedDevNum)
if plugin == nil {
klog.Fatal("Cannot create device plugin, please check above error messages.")
}

View File

@ -16,7 +16,10 @@ package main
import (
"flag"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"testing"
"github.com/google/gousb"
@ -31,7 +34,6 @@ func init() {
type testCase struct {
vendorID int
productIDs []int
sharedNum int
}
//OpenDevices tries to inject gousb compatible fake device info.
@ -50,6 +52,45 @@ func (t *testCase) OpenDevices(opener func(desc *gousb.DeviceDesc) bool) ([]*gou
return ret, nil
}
func createDevice(pciBusRootDir string, bdf string, vid string, pid string) error {
err := os.MkdirAll(filepath.Join(pciBusRootDir, bdf), 0755)
if err != nil {
return err
}
vidHex := append([]byte(vid), 0xa)
pidHex := append([]byte(pid), 0xa)
err = ioutil.WriteFile(filepath.Join(pciBusRootDir, bdf, "vendor"), vidHex, 0444)
if err != nil {
return err
}
err = ioutil.WriteFile(filepath.Join(pciBusRootDir, bdf, "device"), pidHex, 0444)
if err != nil {
return err
}
return nil
}
func createTestPCI(folder string, testPCI []PCIPidDeviceType) error {
var busNum int = 1
var devNum int = 3
//Loop for all supported device type
for _, pciPid := range testPCI {
//Loop for pid number
for _, pidVPU := range pciPid.pids {
//Create intended bus number based on ratio
for i := 0; i < devNum*pciPid.ratio; i++ {
if err := createDevice(folder, strconv.Itoa(busNum), vendorIDIntel, pidVPU); err != nil {
return err
}
busNum += 1
}
}
}
return nil
}
// fakeNotifier implements Notifier interface.
type fakeNotifier struct {
scanDone chan bool
@ -62,20 +103,26 @@ func (n *fakeNotifier) Notify(newDeviceTree dpapi.DeviceTree) {
n.scanDone <- true
}
func TestScan(t *testing.T) {
func TestScanPci(t *testing.T) {
var fN fakeNotifier
f, err := os.Create(hddlSockPath)
f, err := os.Create(hddlSocketPci)
if err != nil {
t.Error("create fake hddl file failed")
}
//inject our fake gousbContext, just borrow vendorID and productIDs from main
tc := &testCase{
vendorID: vendorID,
//create a temporary folder to create fake devices files for PCI scanning
tmpPciDir, err := ioutil.TempDir("/tmp", "fake-pci-devices")
if err != nil {
t.Fatal(err)
}
//inject some productIDs that not match our target too
tc.productIDs = append(productIDs, 0xdead, 0xbeef)
testPlugin := newDevicePlugin(tc, vendorID, productIDs, 10)
defer os.RemoveAll(tmpPciDir)
//create supported PCI devices file
if err = createTestPCI(tmpPciDir, productIDsPCI); err != nil {
t.Fatal(err)
}
testPlugin := newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 10)
if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin().")
@ -86,15 +133,18 @@ func TestScan(t *testing.T) {
if err != nil {
t.Error("vpu plugin test failed with testPlugin.Scan()")
}
if len(fN.tree[deviceType]) == 0 {
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0")
//Loop for all supported PCI device type
for _, pciPid := range productIDsPCI {
if len(fN.tree[pciPid.deviceType]) == 0 {
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0")
}
klog.V(4).Infof("tree len of pci %s is %d", pciPid.deviceType, len(fN.tree[pciPid.deviceType]))
}
klog.V(4).Infof("tree len is %d", len(fN.tree[deviceType]))
//remove the hddl_service.sock and test with no hddl socket case
_ = f.Close()
_ = os.Remove("/var/tmp/hddl_service.sock")
testPlugin = newDevicePlugin(tc, vendorID, productIDs, 10)
testPlugin = newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 10)
if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin() in no hddl_service.sock case.")
@ -110,7 +160,62 @@ func TestScan(t *testing.T) {
}
//test with sharedNum equals 0 case
testPlugin = newDevicePlugin(tc, vendorID, productIDs, tc.sharedNum)
testPlugin = newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 0)
if testPlugin != nil {
t.Error("vpu plugin test fail: newDevicePlugin should fail with 0 sharedDevNum")
}
}
func TestScan(t *testing.T) {
var fN fakeNotifier
f, err := os.Create(hddlSockPath)
if err != nil {
t.Error("create fake hddl file failed")
}
//inject our fake gousbContext, just borrow vendorID and productIDs from main
tc := &testCase{
vendorID: vendorID,
}
//inject some productIDs that not match our target too
tc.productIDs = append(productIDs, 0xdead, 0xbeef)
testPlugin := newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 10)
if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin().")
}
fN.scanDone = testPlugin.scanDone
err = testPlugin.Scan(&fN)
if err != nil {
t.Error("vpu plugin test failed with testPlugin.Scan()")
}
if len(fN.tree[deviceType]) == 0 {
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0")
}
klog.V(4).Infof("tree len of usb is %d", len(fN.tree[deviceType]))
//remove the hddl_service.sock and test with no hddl socket case
_ = f.Close()
_ = os.Remove("/var/tmp/hddl_service.sock")
testPlugin = newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 10)
if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin() in no hddl_service.sock case.")
}
fN.scanDone = testPlugin.scanDone
err = testPlugin.Scan(&fN)
if err != nil {
t.Error("vpu plugin test failed with testPlugin.Scan() in no hddl_service.sock case.")
}
if len(fN.tree[deviceType]) != 0 {
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len should be 0 in no hddl_service.sock case.")
}
//test with sharedNum equals 0 case
testPlugin = newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 0)
if testPlugin != nil {
t.Error("vpu plugin test fail: newDevicePlugin should fail with 0 sharedDevNum")
}

View File

@ -0,0 +1,10 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: intel-vpu-plugin
spec:
template:
spec:
containers:
- name: intel-vpu-plugin
args: ["--mode=2"]

View File

@ -0,0 +1,11 @@
bases:
- ../../base/
patches:
- add_command_args.yaml
patchesJson6902:
- target:
group: apps
version: v1
kind: DaemonSet
name: intel-vpu-plugin
path: volumes_patch.yaml

View File

@ -0,0 +1,12 @@
- op: replace
path: /spec/template/spec/containers/0/volumeMounts/0/mountPath
value: /dev/xlnk
- op: replace
path : /spec/template/spec/containers/0/volumeMounts/0/name
value: devxlnk
- op: replace
path: /spec/template/spec/volumes/0/name
value: devxlnk
- op: replace
path: /spec/template/spec/volumes/0/hostPath/path
value: /dev/xlnk