diff --git a/cmd/fpga_plugin/fpga_plugin.go b/cmd/fpga_plugin/fpga_plugin.go index 1c96c47e..1f8413df 100644 --- a/cmd/fpga_plugin/fpga_plugin.go +++ b/cmd/fpga_plugin/fpga_plugin.go @@ -52,6 +52,10 @@ const ( deviceRE = `^intel-fpga-dev.[0-9]+$` portRE = `^intel-fpga-port.[0-9]+$` fmeRE = `^intel-fpga-fme.[0-9]+$` + + // When the device's firmware crashes the driver reports these values + unhealthyAfuID = "ffffffffffffffffffffffffffffffff" + unhealthyInterfaceID = "ffffffffffffffffffffffffffffffff" ) type getDevTreeFunc func(devices []device) dpapi.DeviceTree @@ -62,6 +66,10 @@ func getRegionDevelTree(devices []device) dpapi.DeviceTree { for _, dev := range devices { for _, region := range dev.regions { + health := pluginapi.Healthy + if region.interfaceID == unhealthyInterfaceID { + health = pluginapi.Unhealthy + } devType := fmt.Sprintf("%s-%s", regionMode, region.interfaceID) devNodes := make([]string, len(region.afus)+1) for num, afu := range region.afus { @@ -69,7 +77,7 @@ func getRegionDevelTree(devices []device) dpapi.DeviceTree { } devNodes[len(region.afus)] = region.devNode regionTree.AddDevice(devType, region.id, dpapi.DeviceInfo{ - State: pluginapi.Healthy, + State: health, Nodes: devNodes, }) } @@ -84,13 +92,17 @@ func getRegionTree(devices []device) dpapi.DeviceTree { for _, dev := range devices { for _, region := range dev.regions { + health := pluginapi.Healthy + if region.interfaceID == unhealthyInterfaceID { + health = pluginapi.Unhealthy + } devType := fmt.Sprintf("%s-%s", regionMode, region.interfaceID) devNodes := make([]string, len(region.afus)) for num, afu := range region.afus { devNodes[num] = afu.devNode } regionTree.AddDevice(devType, region.id, dpapi.DeviceInfo{ - State: pluginapi.Healthy, + State: health, Nodes: devNodes, }) } @@ -106,9 +118,13 @@ func getAfuTree(devices []device) dpapi.DeviceTree { for _, dev := range devices { for _, region := range dev.regions { for _, afu := range region.afus { + health := pluginapi.Healthy + if afu.afuID == unhealthyAfuID { + health = pluginapi.Unhealthy + } devType := fmt.Sprintf("%s-%s", afMode, afu.afuID) afuTree.AddDevice(devType, afu.id, dpapi.DeviceInfo{ - State: pluginapi.Healthy, + State: health, Nodes: []string{afu.devNode}, }) } @@ -194,7 +210,7 @@ func (dp *devicePlugin) PostAllocate(response *pluginapi.AllocateResponse) error return nil } -// Scan starts scanning of FPGA devices on the host +// Scan starts scanning FPGA devices on the host func (dp *devicePlugin) Scan(notifier dpapi.Notifier) error { for { devTree, err := dp.scanFPGAs() diff --git a/cmd/fpga_plugin/fpga_plugin_test.go b/cmd/fpga_plugin/fpga_plugin_test.go index 9b18ac2f..68fcb440 100644 --- a/cmd/fpga_plugin/fpga_plugin_test.go +++ b/cmd/fpga_plugin/fpga_plugin_test.go @@ -119,6 +119,23 @@ func getDevices() []device { }, }, }, + { + name: "intel-fpga-dev.2", + regions: []region{ + { + id: "intel-fpga-fme.2", + interfaceID: unhealthyInterfaceID, + devNode: "/dev/intel-fpga-fme.2", + afus: []afu{ + { + id: "intel-fpga-port.2", + afuID: unhealthyAfuID, + devNode: "/dev/intel-fpga-port.2", + }, + }, + }, + }, + }, } } @@ -132,6 +149,10 @@ func TestGetRegionDevelTree(t *testing.T) { State: pluginapi.Healthy, Nodes: []string{"/dev/intel-fpga-port.1", "/dev/intel-fpga-fme.1"}, }) + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.DeviceInfo{ + State: pluginapi.Unhealthy, + Nodes: []string{"/dev/intel-fpga-port.2", "/dev/intel-fpga-fme.2"}, + }) result := getRegionDevelTree(getDevices()) if !reflect.DeepEqual(result, expected) { @@ -149,6 +170,10 @@ func TestGetRegionTree(t *testing.T) { State: pluginapi.Healthy, Nodes: []string{"/dev/intel-fpga-port.1"}, }) + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.DeviceInfo{ + State: pluginapi.Unhealthy, + Nodes: []string{"/dev/intel-fpga-port.2"}, + }) result := getRegionTree(getDevices()) if !reflect.DeepEqual(result, expected) { @@ -166,6 +191,10 @@ func TestGetAfuTree(t *testing.T) { State: pluginapi.Healthy, Nodes: []string{"/dev/intel-fpga-port.1"}, }) + expected.AddDevice(afMode+"-"+unhealthyAfuID, "intel-fpga-port.2", dpapi.DeviceInfo{ + State: pluginapi.Unhealthy, + Nodes: []string{"/dev/intel-fpga-port.2"}, + }) result := getAfuTree(getDevices()) if !reflect.DeepEqual(result, expected) {