Merge pull request #634 from chunfungintel/Intel_vpu_pci_device_plugin

Implement support for PCI-based VPU
This commit is contained in:
Mikko Ylinen 2021-05-19 20:08:00 +03:00 committed by GitHub
commit a207bfe4ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 326 additions and 37 deletions

View File

@ -34,6 +34,11 @@ This card has:
- 8 MyriadX VPUs - 8 MyriadX VPUs
- PCIe interface to 6th+ Generation Core PC or Xeon E3/E5 server - PCIe interface to 6th+ Generation Core PC or Xeon E3/E5 server
[Gen 3 Intel® Movidius™ VPU HDDL VE3](https://www.intel.com/content/www/us/en/products/details/processors/movidius-vpu.html)
This card has:
- 3 Intel® Movidius Gen 3 Intel® Movidius™ VPU SoCs
> **Note:** This device plugin need HDDL daemon service to be running either natively or from a container. > **Note:** This device plugin need HDDL daemon service to be running either natively or from a container.
> To get VCAC-A or Mustang card running hddl, please refer to: > To get VCAC-A or Mustang card running hddl, please refer to:
> https://github.com/OpenVisualCloud/Dockerfiles/blob/master/VCAC-A/script/setup_hddl.sh > https://github.com/OpenVisualCloud/Dockerfiles/blob/master/VCAC-A/script/setup_hddl.sh
@ -88,6 +93,12 @@ daemonset.apps/intel-vpu-plugin created
the nodes' DAC rules must be configured to device plugin socket creation and kubelet registration. the nodes' DAC rules must be configured to device plugin socket creation and kubelet registration.
Furthermore, the deployments `securityContext` must be configured with appropriate `runAsUser/runAsGroup`. Furthermore, the deployments `securityContext` must be configured with appropriate `runAsUser/runAsGroup`.
For xlink device, deploy DaemonSet as
```bash
$ kubectl apply -k deployments/vpu_plugin/overlays/xlink
daemonset.apps/intel-vpu-plugin created
```
### Deploy by hand ### Deploy by hand
For development purposes, it is sometimes convenient to deploy the plugin 'by hand' on a node. For development purposes, it is sometimes convenient to deploy the plugin 'by hand' on a node.

View File

@ -17,8 +17,11 @@ package main
import ( import (
"flag" "flag"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"path/filepath"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/google/gousb" "github.com/google/gousb"
@ -43,33 +46,85 @@ const (
scanFrequency = 5 * time.Second scanFrequency = 5 * time.Second
) )
const (
vendorIDIntel = "0x8086"
xlinkDevNode = "/dev/xlnk"
hddlAlive = "/var/tmp/hddlunite_service_alive.mutex"
hddlReady = "/var/tmp/hddlunite_service_ready.mutex"
hddlStartExit = "/var/tmp/hddlunite_service_start_exit.mutex"
hddlSocketPci = "/var/tmp/hddlunite_service.sock"
sysBusPCIDevice = "/sys/bus/pci/devices"
)
var ( var (
// Movidius MyriadX Product IDs. // Movidius MyriadX Product IDs.
productIDs = []int{0x2485, 0xf63b} productIDs = []int{0x2485, 0xf63b}
// PCI Product IDs.
productIDsPCI = []PCIPidDeviceType{{[]string{"0x6240"}, "kmb", 1}}
) )
type gousbContext interface { type gousbContext interface {
OpenDevices(opener func(desc *gousb.DeviceDesc) bool) ([]*gousb.Device, error) OpenDevices(opener func(desc *gousb.DeviceDesc) bool) ([]*gousb.Device, error)
} }
type PCIPidDeviceType struct {
pids []string
deviceType string
ratio int
}
func getPciDeviceCounts(sysfsPciDevicesPath string, vendorID string, pidSearch []PCIPidDeviceType) ([]int, error) {
found := make([]int, len(pidSearch))
bdf, _ := ioutil.ReadDir(sysfsPciDevicesPath)
// Check for all folder inside sysfs
for _, bus := range bdf {
// Extract vid and pid
vidRaw, _ := ioutil.ReadFile(filepath.Join(sysfsPciDevicesPath, bus.Name(), "vendor"))
pidRaw, _ := ioutil.ReadFile(filepath.Join(sysfsPciDevicesPath, bus.Name(), "device"))
vid := strings.TrimSpace(string(vidRaw))
pid := strings.TrimSpace(string(pidRaw))
// Loop for supported VPU type: kmb
for i, pciPid := range pidSearch {
// Loop for list of pid of supported device type
for _, pidVPU := range pciPid.pids {
if vid == vendorID && pid == pidVPU {
found[i] += 1
}
}
}
}
return found, nil
}
type devicePlugin struct { type devicePlugin struct {
usbContext gousbContext deviceCtx interface{}
vendorID int
productIDs []int
sharedDevNum int sharedDevNum int
scanTicker *time.Ticker scanTicker *time.Ticker
scanDone chan bool scanDone chan bool
} }
func newDevicePlugin(usbContext gousbContext, vendorID int, productIDs []int, sharedDevNum int) *devicePlugin { type devicePluginUsb struct {
usbContext gousbContext
vendorID int
productIDs []int
}
type devicePluginPci struct {
sysfsPciDevicesPath string
vendorIDPCI string
productIDsPCI []PCIPidDeviceType
}
func newDevicePlugin(deviceCtx interface{}, sharedDevNum int) *devicePlugin {
if sharedDevNum < 1 { if sharedDevNum < 1 {
klog.V(1).Info("The number of containers sharing the same VPU must greater than zero") klog.V(1).Info("The number of containers sharing the same VPU must greater than zero")
return nil return nil
} }
return &devicePlugin{ return &devicePlugin{
usbContext: usbContext, deviceCtx: deviceCtx,
vendorID: vendorID,
productIDs: productIDs,
sharedDevNum: sharedDevNum, sharedDevNum: sharedDevNum,
scanTicker: time.NewTicker(scanFrequency), scanTicker: time.NewTicker(scanFrequency),
scanDone: make(chan bool, 1), scanDone: make(chan bool, 1),
@ -103,21 +158,25 @@ func fileExists(filename string) bool {
return false return false
} }
func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { func (dp *devicePlugin) scanUsb(devTree *dpapi.DeviceTree) {
var nUsb int var nUsb int
devTree := dpapi.NewDeviceTree()
// first check if HDDL sock is there // first check if HDDL sock is there
if !fileExists(hddlSockPath) { if !fileExists(hddlSockPath) {
return devTree, nil return
} }
devs, err := dp.usbContext.OpenDevices(func(desc *gousb.DeviceDesc) bool { deviceCtx, ok := dp.deviceCtx.(devicePluginUsb)
if !ok {
klog.V(4).Infof("wrong context %s", ok)
}
devs, err := deviceCtx.usbContext.OpenDevices(func(desc *gousb.DeviceDesc) bool {
thisVendor := desc.Vendor thisVendor := desc.Vendor
thisProduct := desc.Product thisProduct := desc.Product
for _, v := range dp.productIDs { for _, v := range deviceCtx.productIDs {
klog.V(4).Infof("checking %04x,%04x vs %s,%s", dp.vendorID, v, thisVendor.String(), thisProduct.String()) klog.V(4).Infof("checking %04x,%04x vs %s,%s", deviceCtx.vendorID, v, thisVendor.String(), thisProduct.String())
if (gousb.ID(dp.vendorID) == thisVendor) && (gousb.ID(v) == thisProduct) { if (gousb.ID(deviceCtx.vendorID) == thisVendor) && (gousb.ID(v) == thisProduct) {
nUsb++ nUsb++
} }
} }
@ -163,29 +222,110 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
devTree.AddDevice(deviceType, devID, dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil)) devTree.AddDevice(deviceType, devID, dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil))
} }
} }
}
func (dp *devicePlugin) scanPci(devTree *dpapi.DeviceTree) {
// first check if HDDL sock is there
if !fileExists(hddlSocketPci) {
return
}
deviceCtx, ok := dp.deviceCtx.(devicePluginPci)
if !ok {
klog.V(4).Infof("wrong context %s", ok)
}
// Get all PCI devices
pciFound, err := getPciDeviceCounts(deviceCtx.sysfsPciDevicesPath, deviceCtx.vendorIDPCI, deviceCtx.productIDsPCI)
if err != nil {
klog.V(4).Infof("list pci device %s", err)
}
// Mount VPU
for i := 0; i < len(pciFound); i++ {
deviceTypePci := deviceCtx.productIDsPCI[i].deviceType
deviceRatio := deviceCtx.productIDsPCI[i].ratio
// If device found
if remainder := pciFound[i] % deviceRatio; remainder == 0 {
count := pciFound[i] / deviceRatio
nodes := []pluginapi.DeviceSpec{
{
HostPath: xlinkDevNode,
ContainerPath: xlinkDevNode,
Permissions: "rw",
},
}
mounts := []pluginapi.Mount{
{
HostPath: hddlAlive,
ContainerPath: hddlAlive,
},
{
HostPath: hddlReady,
ContainerPath: hddlReady,
},
{
HostPath: hddlStartExit,
ContainerPath: hddlStartExit,
},
{
HostPath: hddlSocketPci,
ContainerPath: hddlSocketPci,
},
}
// Mount all devices
for i := 0; i < count; i++ {
devID := fmt.Sprintf("%s-device-%d", deviceTypePci, i)
// VPU pci device found and added to node
klog.V(1).Info(devID)
devTree.AddDevice(deviceTypePci, devID, dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil))
}
}
}
}
func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
devTree := dpapi.NewDeviceTree()
switch dp.deviceCtx.(type) {
case devicePluginUsb:
dp.scanUsb(&devTree)
case devicePluginPci:
dp.scanPci(&devTree)
default:
}
return devTree, nil return devTree, nil
} }
func main() { func main() {
var sharedDevNum int var sharedDevNum int
var scanMode int
flag.IntVar(&sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same VPU device") flag.IntVar(&sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same VPU device")
flag.IntVar(&scanMode, "mode", 1, "USB=1 PCI=2")
flag.Parse() flag.Parse()
klog.V(1).Info("VPU device plugin started") klog.V(1).Info("VPU device plugin started")
// add lsusb here var plugin *devicePlugin
ctx := gousb.NewContext() if scanMode == 1 {
defer ctx.Close() // add lsusb here
ctx := gousb.NewContext()
defer ctx.Close()
verbosityLevel, err := strconv.Atoi(flag.CommandLine.Lookup("v").Value.String()) verbosityLevel, err := strconv.Atoi(flag.CommandLine.Lookup("v").Value.String())
if err == nil { if err == nil {
// gousb (libusb) Debug levels are a 1:1 match to klog levels, just pass through. // gousb (libusb) Debug levels are a 1:1 match to klog levels, just pass through.
ctx.Debug(verbosityLevel) ctx.Debug(verbosityLevel)
}
deviceCtxUsb := devicePluginUsb{usbContext: ctx, vendorID: vendorID, productIDs: productIDs}
plugin = newDevicePlugin(deviceCtxUsb, sharedDevNum)
} else if scanMode == 2 {
deviceCtxPci := devicePluginPci{sysfsPciDevicesPath: sysBusPCIDevice, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}
plugin = newDevicePlugin(deviceCtxPci, sharedDevNum)
} }
plugin := newDevicePlugin(ctx, vendorID, productIDs, sharedDevNum)
if plugin == nil { if plugin == nil {
klog.Fatal("Cannot create device plugin, please check above error messages.") klog.Fatal("Cannot create device plugin, please check above error messages.")
} }

View File

@ -16,7 +16,10 @@ package main
import ( import (
"flag" "flag"
"io/ioutil"
"os" "os"
"path/filepath"
"strconv"
"testing" "testing"
"github.com/google/gousb" "github.com/google/gousb"
@ -31,7 +34,6 @@ func init() {
type testCase struct { type testCase struct {
vendorID int vendorID int
productIDs []int productIDs []int
sharedNum int
} }
//OpenDevices tries to inject gousb compatible fake device info. //OpenDevices tries to inject gousb compatible fake device info.
@ -50,6 +52,45 @@ func (t *testCase) OpenDevices(opener func(desc *gousb.DeviceDesc) bool) ([]*gou
return ret, nil return ret, nil
} }
func createDevice(pciBusRootDir string, bdf string, vid string, pid string) error {
err := os.MkdirAll(filepath.Join(pciBusRootDir, bdf), 0755)
if err != nil {
return err
}
vidHex := append([]byte(vid), 0xa)
pidHex := append([]byte(pid), 0xa)
err = ioutil.WriteFile(filepath.Join(pciBusRootDir, bdf, "vendor"), vidHex, 0444)
if err != nil {
return err
}
err = ioutil.WriteFile(filepath.Join(pciBusRootDir, bdf, "device"), pidHex, 0444)
if err != nil {
return err
}
return nil
}
func createTestPCI(folder string, testPCI []PCIPidDeviceType) error {
var busNum int = 1
var devNum int = 3
//Loop for all supported device type
for _, pciPid := range testPCI {
//Loop for pid number
for _, pidVPU := range pciPid.pids {
//Create intended bus number based on ratio
for i := 0; i < devNum*pciPid.ratio; i++ {
if err := createDevice(folder, strconv.Itoa(busNum), vendorIDIntel, pidVPU); err != nil {
return err
}
busNum += 1
}
}
}
return nil
}
// fakeNotifier implements Notifier interface. // fakeNotifier implements Notifier interface.
type fakeNotifier struct { type fakeNotifier struct {
scanDone chan bool scanDone chan bool
@ -62,20 +103,26 @@ func (n *fakeNotifier) Notify(newDeviceTree dpapi.DeviceTree) {
n.scanDone <- true n.scanDone <- true
} }
func TestScan(t *testing.T) { func TestScanPci(t *testing.T) {
var fN fakeNotifier var fN fakeNotifier
f, err := os.Create(hddlSockPath) f, err := os.Create(hddlSocketPci)
if err != nil { if err != nil {
t.Error("create fake hddl file failed") t.Error("create fake hddl file failed")
} }
//inject our fake gousbContext, just borrow vendorID and productIDs from main
tc := &testCase{ //create a temporary folder to create fake devices files for PCI scanning
vendorID: vendorID, tmpPciDir, err := ioutil.TempDir("/tmp", "fake-pci-devices")
if err != nil {
t.Fatal(err)
} }
//inject some productIDs that not match our target too defer os.RemoveAll(tmpPciDir)
tc.productIDs = append(productIDs, 0xdead, 0xbeef) //create supported PCI devices file
testPlugin := newDevicePlugin(tc, vendorID, productIDs, 10) if err = createTestPCI(tmpPciDir, productIDsPCI); err != nil {
t.Fatal(err)
}
testPlugin := newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 10)
if testPlugin == nil { if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin().") t.Fatal("vpu plugin test failed with newDevicePlugin().")
@ -86,15 +133,18 @@ func TestScan(t *testing.T) {
if err != nil { if err != nil {
t.Error("vpu plugin test failed with testPlugin.Scan()") t.Error("vpu plugin test failed with testPlugin.Scan()")
} }
if len(fN.tree[deviceType]) == 0 { //Loop for all supported PCI device type
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0") for _, pciPid := range productIDsPCI {
if len(fN.tree[pciPid.deviceType]) == 0 {
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0")
}
klog.V(4).Infof("tree len of pci %s is %d", pciPid.deviceType, len(fN.tree[pciPid.deviceType]))
} }
klog.V(4).Infof("tree len is %d", len(fN.tree[deviceType]))
//remove the hddl_service.sock and test with no hddl socket case //remove the hddl_service.sock and test with no hddl socket case
_ = f.Close() _ = f.Close()
_ = os.Remove("/var/tmp/hddl_service.sock") _ = os.Remove("/var/tmp/hddl_service.sock")
testPlugin = newDevicePlugin(tc, vendorID, productIDs, 10) testPlugin = newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 10)
if testPlugin == nil { if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin() in no hddl_service.sock case.") t.Fatal("vpu plugin test failed with newDevicePlugin() in no hddl_service.sock case.")
@ -110,7 +160,62 @@ func TestScan(t *testing.T) {
} }
//test with sharedNum equals 0 case //test with sharedNum equals 0 case
testPlugin = newDevicePlugin(tc, vendorID, productIDs, tc.sharedNum) testPlugin = newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 0)
if testPlugin != nil {
t.Error("vpu plugin test fail: newDevicePlugin should fail with 0 sharedDevNum")
}
}
func TestScan(t *testing.T) {
var fN fakeNotifier
f, err := os.Create(hddlSockPath)
if err != nil {
t.Error("create fake hddl file failed")
}
//inject our fake gousbContext, just borrow vendorID and productIDs from main
tc := &testCase{
vendorID: vendorID,
}
//inject some productIDs that not match our target too
tc.productIDs = append(productIDs, 0xdead, 0xbeef)
testPlugin := newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 10)
if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin().")
}
fN.scanDone = testPlugin.scanDone
err = testPlugin.Scan(&fN)
if err != nil {
t.Error("vpu plugin test failed with testPlugin.Scan()")
}
if len(fN.tree[deviceType]) == 0 {
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0")
}
klog.V(4).Infof("tree len of usb is %d", len(fN.tree[deviceType]))
//remove the hddl_service.sock and test with no hddl socket case
_ = f.Close()
_ = os.Remove("/var/tmp/hddl_service.sock")
testPlugin = newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 10)
if testPlugin == nil {
t.Fatal("vpu plugin test failed with newDevicePlugin() in no hddl_service.sock case.")
}
fN.scanDone = testPlugin.scanDone
err = testPlugin.Scan(&fN)
if err != nil {
t.Error("vpu plugin test failed with testPlugin.Scan() in no hddl_service.sock case.")
}
if len(fN.tree[deviceType]) != 0 {
t.Error("vpu plugin test failed with testPlugin.Scan(): tree len should be 0 in no hddl_service.sock case.")
}
//test with sharedNum equals 0 case
testPlugin = newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 0)
if testPlugin != nil { if testPlugin != nil {
t.Error("vpu plugin test fail: newDevicePlugin should fail with 0 sharedDevNum") t.Error("vpu plugin test fail: newDevicePlugin should fail with 0 sharedDevNum")
} }

View File

@ -0,0 +1,10 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: intel-vpu-plugin
spec:
template:
spec:
containers:
- name: intel-vpu-plugin
args: ["--mode=2"]

View File

@ -0,0 +1,11 @@
bases:
- ../../base/
patches:
- add_command_args.yaml
patchesJson6902:
- target:
group: apps
version: v1
kind: DaemonSet
name: intel-vpu-plugin
path: volumes_patch.yaml

View File

@ -0,0 +1,12 @@
- op: replace
path: /spec/template/spec/containers/0/volumeMounts/0/mountPath
value: /dev/xlnk
- op: replace
path : /spec/template/spec/containers/0/volumeMounts/0/name
value: devxlnk
- op: replace
path: /spec/template/spec/volumes/0/name
value: devxlnk
- op: replace
path: /spec/template/spec/volumes/0/hostPath/path
value: /dev/xlnk