From 40246f64ad3d1bfbedcb7113554ccda5e308807f Mon Sep 17 00:00:00 2001 From: Dmitry Rozhkov Date: Tue, 14 Aug 2018 14:54:49 +0300 Subject: [PATCH] gpu_plugin: add -shared-dev-num option The DRM driver of Intel i915 GPUs allows sharing one device between many containers. Make it possible to use the same device from different containers. The exact number of containers sharing the same device can be limited with the new option -shared-dev-num set to 1 by default. closes #53 --- cmd/gpu_plugin/gpu_plugin.go | 31 +++++++++++++++++++++++-------- cmd/gpu_plugin/gpu_plugin_test.go | 2 +- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index 4d6dc5d3..9aa5e7ee 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -47,14 +47,17 @@ type devicePlugin struct { sysfsDir string devfsDir string + sharedDevNum int + gpuDeviceReg *regexp.Regexp controlDeviceReg *regexp.Regexp } -func newDevicePlugin(sysfsDir string, devfsDir string) *devicePlugin { +func newDevicePlugin(sysfsDir, devfsDir string, sharedDevNum int) *devicePlugin { return &devicePlugin{ sysfsDir: sysfsDir, devfsDir: devfsDir, + sharedDevNum: sharedDevNum, gpuDeviceReg: regexp.MustCompile(gpuDeviceRE), controlDeviceReg: regexp.MustCompile(controlDeviceRE), } @@ -112,12 +115,15 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { } if len(nodes) > 0 { - // Currently only one device type (i915) is supported. - // TODO: check model ID to differentiate device models. - devTree.AddDevice(deviceType, f.Name(), dpapi.DeviceInfo{ - State: pluginapi.Healthy, - Nodes: nodes, - }) + for i := 0; i < dp.sharedDevNum; i++ { + devID := fmt.Sprintf("%s-%d", f.Name(), i) + // Currently only one device type (i915) is supported. + // TODO: check model ID to differentiate device models. + devTree.AddDevice(deviceType, devID, dpapi.DeviceInfo{ + State: pluginapi.Healthy, + Nodes: nodes, + }) + } } } } @@ -127,10 +133,19 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { } func main() { + var sharedDevNum int + + flag.IntVar(&sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device") flag.Parse() + + if sharedDevNum < 1 { + glog.Error("The number of containers sharing the same GPU must greater than zero") + os.Exit(1) + } + glog.Info("GPU device plugin started") - plugin := newDevicePlugin(sysfsDrmDirectory, devfsDriDirectory) + plugin := newDevicePlugin(sysfsDrmDirectory, devfsDriDirectory, sharedDevNum) manager := dpapi.NewManager(namespace, plugin) manager.Run() } diff --git a/cmd/gpu_plugin/gpu_plugin_test.go b/cmd/gpu_plugin/gpu_plugin_test.go index 35f21acc..3fb59c64 100644 --- a/cmd/gpu_plugin/gpu_plugin_test.go +++ b/cmd/gpu_plugin/gpu_plugin_test.go @@ -75,7 +75,7 @@ func TestScan(t *testing.T) { }, } - testPlugin := newDevicePlugin(sysfs, devfs) + testPlugin := newDevicePlugin(sysfs, devfs, 1) if testPlugin == nil { t.Fatal("Failed to create a deviceManager")