diff --git a/cmd/vpu_plugin/README.md b/cmd/vpu_plugin/README.md index d9fb3409..61e16138 100644 --- a/cmd/vpu_plugin/README.md +++ b/cmd/vpu_plugin/README.md @@ -34,6 +34,11 @@ This card has: - 8 MyriadX VPUs - PCIe interface to 6th+ Generation Core PC or Xeon E3/E5 server +[Gen 3 Intel® Movidius™ VPU HDDL VE3](https://www.intel.com/content/www/us/en/products/details/processors/movidius-vpu.html) +This card has: +- 3 Intel® Movidius Gen 3 Intel® Movidius™ VPU SoCs + + > **Note:** This device plugin need HDDL daemon service to be running either natively or from a container. > To get VCAC-A or Mustang card running hddl, please refer to: > https://github.com/OpenVisualCloud/Dockerfiles/blob/master/VCAC-A/script/setup_hddl.sh @@ -88,6 +93,12 @@ daemonset.apps/intel-vpu-plugin created the nodes' DAC rules must be configured to device plugin socket creation and kubelet registration. Furthermore, the deployments `securityContext` must be configured with appropriate `runAsUser/runAsGroup`. +For xlink device, deploy DaemonSet as +```bash +$ kubectl apply -k deployments/vpu_plugin/overlays/xlink +daemonset.apps/intel-vpu-plugin created +``` + ### Deploy by hand For development purposes, it is sometimes convenient to deploy the plugin 'by hand' on a node. diff --git a/cmd/vpu_plugin/vpu_plugin.go b/cmd/vpu_plugin/vpu_plugin.go index 272168ce..a22afbfb 100644 --- a/cmd/vpu_plugin/vpu_plugin.go +++ b/cmd/vpu_plugin/vpu_plugin.go @@ -17,8 +17,11 @@ package main import ( "flag" "fmt" + "io/ioutil" "os" + "path/filepath" "strconv" + "strings" "time" "github.com/google/gousb" @@ -43,33 +46,85 @@ const ( scanFrequency = 5 * time.Second ) +const ( + vendorIDIntel = "0x8086" + xlinkDevNode = "/dev/xlnk" + + hddlAlive = "/var/tmp/hddlunite_service_alive.mutex" + hddlReady = "/var/tmp/hddlunite_service_ready.mutex" + hddlStartExit = "/var/tmp/hddlunite_service_start_exit.mutex" + hddlSocketPci = "/var/tmp/hddlunite_service.sock" + + sysBusPCIDevice = "/sys/bus/pci/devices" +) + var ( // Movidius MyriadX Product IDs. productIDs = []int{0x2485, 0xf63b} + // PCI Product IDs. + productIDsPCI = []PCIPidDeviceType{{[]string{"0x6240"}, "kmb", 1}} ) type gousbContext interface { OpenDevices(opener func(desc *gousb.DeviceDesc) bool) ([]*gousb.Device, error) } +type PCIPidDeviceType struct { + pids []string + deviceType string + ratio int +} + +func getPciDeviceCounts(sysfsPciDevicesPath string, vendorID string, pidSearch []PCIPidDeviceType) ([]int, error) { + found := make([]int, len(pidSearch)) + + bdf, _ := ioutil.ReadDir(sysfsPciDevicesPath) + // Check for all folder inside sysfs + for _, bus := range bdf { + // Extract vid and pid + vidRaw, _ := ioutil.ReadFile(filepath.Join(sysfsPciDevicesPath, bus.Name(), "vendor")) + pidRaw, _ := ioutil.ReadFile(filepath.Join(sysfsPciDevicesPath, bus.Name(), "device")) + vid := strings.TrimSpace(string(vidRaw)) + pid := strings.TrimSpace(string(pidRaw)) + // Loop for supported VPU type: kmb + for i, pciPid := range pidSearch { + // Loop for list of pid of supported device type + for _, pidVPU := range pciPid.pids { + if vid == vendorID && pid == pidVPU { + found[i] += 1 + } + } + } + } + return found, nil +} + type devicePlugin struct { - usbContext gousbContext - vendorID int - productIDs []int + deviceCtx interface{} sharedDevNum int scanTicker *time.Ticker scanDone chan bool } -func newDevicePlugin(usbContext gousbContext, vendorID int, productIDs []int, sharedDevNum int) *devicePlugin { +type devicePluginUsb struct { + usbContext gousbContext + vendorID int + productIDs []int +} + +type devicePluginPci struct { + sysfsPciDevicesPath string + vendorIDPCI string + productIDsPCI []PCIPidDeviceType +} + +func newDevicePlugin(deviceCtx interface{}, sharedDevNum int) *devicePlugin { if sharedDevNum < 1 { klog.V(1).Info("The number of containers sharing the same VPU must greater than zero") return nil } return &devicePlugin{ - usbContext: usbContext, - vendorID: vendorID, - productIDs: productIDs, + deviceCtx: deviceCtx, sharedDevNum: sharedDevNum, scanTicker: time.NewTicker(scanFrequency), scanDone: make(chan bool, 1), @@ -103,21 +158,25 @@ func fileExists(filename string) bool { return false } -func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { +func (dp *devicePlugin) scanUsb(devTree *dpapi.DeviceTree) { var nUsb int - devTree := dpapi.NewDeviceTree() // first check if HDDL sock is there if !fileExists(hddlSockPath) { - return devTree, nil + return } - devs, err := dp.usbContext.OpenDevices(func(desc *gousb.DeviceDesc) bool { + deviceCtx, ok := dp.deviceCtx.(devicePluginUsb) + if !ok { + klog.V(4).Infof("wrong context %s", ok) + } + + devs, err := deviceCtx.usbContext.OpenDevices(func(desc *gousb.DeviceDesc) bool { thisVendor := desc.Vendor thisProduct := desc.Product - for _, v := range dp.productIDs { - klog.V(4).Infof("checking %04x,%04x vs %s,%s", dp.vendorID, v, thisVendor.String(), thisProduct.String()) - if (gousb.ID(dp.vendorID) == thisVendor) && (gousb.ID(v) == thisProduct) { + for _, v := range deviceCtx.productIDs { + klog.V(4).Infof("checking %04x,%04x vs %s,%s", deviceCtx.vendorID, v, thisVendor.String(), thisProduct.String()) + if (gousb.ID(deviceCtx.vendorID) == thisVendor) && (gousb.ID(v) == thisProduct) { nUsb++ } } @@ -163,29 +222,110 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { devTree.AddDevice(deviceType, devID, dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil)) } } +} + +func (dp *devicePlugin) scanPci(devTree *dpapi.DeviceTree) { + // first check if HDDL sock is there + if !fileExists(hddlSocketPci) { + return + } + + deviceCtx, ok := dp.deviceCtx.(devicePluginPci) + if !ok { + klog.V(4).Infof("wrong context %s", ok) + } + // Get all PCI devices + pciFound, err := getPciDeviceCounts(deviceCtx.sysfsPciDevicesPath, deviceCtx.vendorIDPCI, deviceCtx.productIDsPCI) + + if err != nil { + klog.V(4).Infof("list pci device %s", err) + } + + // Mount VPU + for i := 0; i < len(pciFound); i++ { + deviceTypePci := deviceCtx.productIDsPCI[i].deviceType + deviceRatio := deviceCtx.productIDsPCI[i].ratio + // If device found + if remainder := pciFound[i] % deviceRatio; remainder == 0 { + count := pciFound[i] / deviceRatio + nodes := []pluginapi.DeviceSpec{ + { + HostPath: xlinkDevNode, + ContainerPath: xlinkDevNode, + Permissions: "rw", + }, + } + + mounts := []pluginapi.Mount{ + { + HostPath: hddlAlive, + ContainerPath: hddlAlive, + }, + { + HostPath: hddlReady, + ContainerPath: hddlReady, + }, + { + HostPath: hddlStartExit, + ContainerPath: hddlStartExit, + }, + { + HostPath: hddlSocketPci, + ContainerPath: hddlSocketPci, + }, + } + // Mount all devices + for i := 0; i < count; i++ { + devID := fmt.Sprintf("%s-device-%d", deviceTypePci, i) + // VPU pci device found and added to node + klog.V(1).Info(devID) + devTree.AddDevice(deviceTypePci, devID, dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil)) + } + } + } +} + +func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { + devTree := dpapi.NewDeviceTree() + + switch dp.deviceCtx.(type) { + case devicePluginUsb: + dp.scanUsb(&devTree) + case devicePluginPci: + dp.scanPci(&devTree) + default: + } return devTree, nil } func main() { var sharedDevNum int + var scanMode int flag.IntVar(&sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same VPU device") + flag.IntVar(&scanMode, "mode", 1, "USB=1 PCI=2") flag.Parse() klog.V(1).Info("VPU device plugin started") - // add lsusb here - ctx := gousb.NewContext() - defer ctx.Close() + var plugin *devicePlugin + if scanMode == 1 { + // add lsusb here + ctx := gousb.NewContext() + defer ctx.Close() - verbosityLevel, err := strconv.Atoi(flag.CommandLine.Lookup("v").Value.String()) - if err == nil { - // gousb (libusb) Debug levels are a 1:1 match to klog levels, just pass through. - ctx.Debug(verbosityLevel) + verbosityLevel, err := strconv.Atoi(flag.CommandLine.Lookup("v").Value.String()) + if err == nil { + // gousb (libusb) Debug levels are a 1:1 match to klog levels, just pass through. + ctx.Debug(verbosityLevel) + } + deviceCtxUsb := devicePluginUsb{usbContext: ctx, vendorID: vendorID, productIDs: productIDs} + plugin = newDevicePlugin(deviceCtxUsb, sharedDevNum) + } else if scanMode == 2 { + deviceCtxPci := devicePluginPci{sysfsPciDevicesPath: sysBusPCIDevice, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI} + plugin = newDevicePlugin(deviceCtxPci, sharedDevNum) } - - plugin := newDevicePlugin(ctx, vendorID, productIDs, sharedDevNum) if plugin == nil { klog.Fatal("Cannot create device plugin, please check above error messages.") } diff --git a/cmd/vpu_plugin/vpu_plugin_test.go b/cmd/vpu_plugin/vpu_plugin_test.go index ff912150..670228f7 100644 --- a/cmd/vpu_plugin/vpu_plugin_test.go +++ b/cmd/vpu_plugin/vpu_plugin_test.go @@ -16,7 +16,10 @@ package main import ( "flag" + "io/ioutil" "os" + "path/filepath" + "strconv" "testing" "github.com/google/gousb" @@ -31,7 +34,6 @@ func init() { type testCase struct { vendorID int productIDs []int - sharedNum int } //OpenDevices tries to inject gousb compatible fake device info. @@ -50,6 +52,45 @@ func (t *testCase) OpenDevices(opener func(desc *gousb.DeviceDesc) bool) ([]*gou return ret, nil } +func createDevice(pciBusRootDir string, bdf string, vid string, pid string) error { + err := os.MkdirAll(filepath.Join(pciBusRootDir, bdf), 0755) + if err != nil { + return err + } + + vidHex := append([]byte(vid), 0xa) + pidHex := append([]byte(pid), 0xa) + + err = ioutil.WriteFile(filepath.Join(pciBusRootDir, bdf, "vendor"), vidHex, 0444) + if err != nil { + return err + } + err = ioutil.WriteFile(filepath.Join(pciBusRootDir, bdf, "device"), pidHex, 0444) + if err != nil { + return err + } + return nil +} + +func createTestPCI(folder string, testPCI []PCIPidDeviceType) error { + var busNum int = 1 + var devNum int = 3 + //Loop for all supported device type + for _, pciPid := range testPCI { + //Loop for pid number + for _, pidVPU := range pciPid.pids { + //Create intended bus number based on ratio + for i := 0; i < devNum*pciPid.ratio; i++ { + if err := createDevice(folder, strconv.Itoa(busNum), vendorIDIntel, pidVPU); err != nil { + return err + } + busNum += 1 + } + } + } + return nil +} + // fakeNotifier implements Notifier interface. type fakeNotifier struct { scanDone chan bool @@ -62,20 +103,26 @@ func (n *fakeNotifier) Notify(newDeviceTree dpapi.DeviceTree) { n.scanDone <- true } -func TestScan(t *testing.T) { +func TestScanPci(t *testing.T) { var fN fakeNotifier - f, err := os.Create(hddlSockPath) + f, err := os.Create(hddlSocketPci) if err != nil { t.Error("create fake hddl file failed") } - //inject our fake gousbContext, just borrow vendorID and productIDs from main - tc := &testCase{ - vendorID: vendorID, + + //create a temporary folder to create fake devices files for PCI scanning + tmpPciDir, err := ioutil.TempDir("/tmp", "fake-pci-devices") + if err != nil { + t.Fatal(err) } - //inject some productIDs that not match our target too - tc.productIDs = append(productIDs, 0xdead, 0xbeef) - testPlugin := newDevicePlugin(tc, vendorID, productIDs, 10) + defer os.RemoveAll(tmpPciDir) + //create supported PCI devices file + if err = createTestPCI(tmpPciDir, productIDsPCI); err != nil { + t.Fatal(err) + } + + testPlugin := newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 10) if testPlugin == nil { t.Fatal("vpu plugin test failed with newDevicePlugin().") @@ -86,15 +133,18 @@ func TestScan(t *testing.T) { if err != nil { t.Error("vpu plugin test failed with testPlugin.Scan()") } - if len(fN.tree[deviceType]) == 0 { - t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0") + //Loop for all supported PCI device type + for _, pciPid := range productIDsPCI { + if len(fN.tree[pciPid.deviceType]) == 0 { + t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0") + } + klog.V(4).Infof("tree len of pci %s is %d", pciPid.deviceType, len(fN.tree[pciPid.deviceType])) } - klog.V(4).Infof("tree len is %d", len(fN.tree[deviceType])) //remove the hddl_service.sock and test with no hddl socket case _ = f.Close() _ = os.Remove("/var/tmp/hddl_service.sock") - testPlugin = newDevicePlugin(tc, vendorID, productIDs, 10) + testPlugin = newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 10) if testPlugin == nil { t.Fatal("vpu plugin test failed with newDevicePlugin() in no hddl_service.sock case.") @@ -110,7 +160,62 @@ func TestScan(t *testing.T) { } //test with sharedNum equals 0 case - testPlugin = newDevicePlugin(tc, vendorID, productIDs, tc.sharedNum) + testPlugin = newDevicePlugin(devicePluginPci{sysfsPciDevicesPath: tmpPciDir, vendorIDPCI: vendorIDIntel, productIDsPCI: productIDsPCI}, 0) + if testPlugin != nil { + t.Error("vpu plugin test fail: newDevicePlugin should fail with 0 sharedDevNum") + } +} + +func TestScan(t *testing.T) { + var fN fakeNotifier + + f, err := os.Create(hddlSockPath) + if err != nil { + t.Error("create fake hddl file failed") + } + //inject our fake gousbContext, just borrow vendorID and productIDs from main + tc := &testCase{ + vendorID: vendorID, + } + //inject some productIDs that not match our target too + tc.productIDs = append(productIDs, 0xdead, 0xbeef) + + testPlugin := newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 10) + + if testPlugin == nil { + t.Fatal("vpu plugin test failed with newDevicePlugin().") + } + + fN.scanDone = testPlugin.scanDone + err = testPlugin.Scan(&fN) + if err != nil { + t.Error("vpu plugin test failed with testPlugin.Scan()") + } + if len(fN.tree[deviceType]) == 0 { + t.Error("vpu plugin test failed with testPlugin.Scan(): tree len is 0") + } + klog.V(4).Infof("tree len of usb is %d", len(fN.tree[deviceType])) + + //remove the hddl_service.sock and test with no hddl socket case + _ = f.Close() + _ = os.Remove("/var/tmp/hddl_service.sock") + testPlugin = newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 10) + + if testPlugin == nil { + t.Fatal("vpu plugin test failed with newDevicePlugin() in no hddl_service.sock case.") + } + + fN.scanDone = testPlugin.scanDone + err = testPlugin.Scan(&fN) + if err != nil { + t.Error("vpu plugin test failed with testPlugin.Scan() in no hddl_service.sock case.") + } + if len(fN.tree[deviceType]) != 0 { + t.Error("vpu plugin test failed with testPlugin.Scan(): tree len should be 0 in no hddl_service.sock case.") + } + + //test with sharedNum equals 0 case + testPlugin = newDevicePlugin(devicePluginUsb{usbContext: tc, vendorID: vendorID, productIDs: productIDs}, 0) if testPlugin != nil { t.Error("vpu plugin test fail: newDevicePlugin should fail with 0 sharedDevNum") } diff --git a/deployments/vpu_plugin/overlays/xlink/add_command_args.yaml b/deployments/vpu_plugin/overlays/xlink/add_command_args.yaml new file mode 100644 index 00000000..44d16bb3 --- /dev/null +++ b/deployments/vpu_plugin/overlays/xlink/add_command_args.yaml @@ -0,0 +1,10 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-vpu-plugin +spec: + template: + spec: + containers: + - name: intel-vpu-plugin + args: ["--mode=2"] diff --git a/deployments/vpu_plugin/overlays/xlink/kustomization.yaml b/deployments/vpu_plugin/overlays/xlink/kustomization.yaml new file mode 100644 index 00000000..f0e8d274 --- /dev/null +++ b/deployments/vpu_plugin/overlays/xlink/kustomization.yaml @@ -0,0 +1,11 @@ +bases: +- ../../base/ +patches: +- add_command_args.yaml +patchesJson6902: +- target: + group: apps + version: v1 + kind: DaemonSet + name: intel-vpu-plugin + path: volumes_patch.yaml diff --git a/deployments/vpu_plugin/overlays/xlink/volumes_patch.yaml b/deployments/vpu_plugin/overlays/xlink/volumes_patch.yaml new file mode 100644 index 00000000..dbcaf8d5 --- /dev/null +++ b/deployments/vpu_plugin/overlays/xlink/volumes_patch.yaml @@ -0,0 +1,12 @@ +- op: replace + path: /spec/template/spec/containers/0/volumeMounts/0/mountPath + value: /dev/xlnk +- op: replace + path : /spec/template/spec/containers/0/volumeMounts/0/name + value: devxlnk +- op: replace + path: /spec/template/spec/volumes/0/name + value: devxlnk +- op: replace + path: /spec/template/spec/volumes/0/hostPath/path + value: /dev/xlnk