diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index d820974b..23a497a9 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -64,15 +64,58 @@ type cliOptions struct { type preferredAllocationPolicyFunc func(*pluginapi.ContainerPreferredAllocationRequest) []string -// nonePolicy is used for allocating GPU devices randomly. +// nonePolicy is used for allocating GPU devices randomly, while trying +// to select as many individual GPU devices as requested. func nonePolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string { klog.V(2).Info("Select nonePolicy for GPU device allocation") - deviceIds := req.AvailableDeviceIDs[0:req.AllocationSize] + devices := make(map[string]bool) + selected := make(map[string]bool) + neededCount := req.AllocationSize - klog.V(2).Infof("Allocate deviceIds: %q", deviceIds) + // When shared-dev-num is greater than 1, try to find as + // many independent GPUs as possible, to satisfy the request. - return deviceIds + for _, deviceID := range req.AvailableDeviceIDs { + device := strings.Split(deviceID, "-")[0] + + if _, found := devices[device]; !found { + devices[device] = true + selected[deviceID] = true + neededCount-- + + if neededCount == 0 { + break + } + } + } + + // If there were not enough independent GPUs, use remaining untaken deviceIDs. + + if neededCount > 0 { + for _, deviceID := range req.AvailableDeviceIDs { + if _, found := selected[deviceID]; !found { + selected[deviceID] = true + neededCount-- + + if neededCount == 0 { + break + } + } + } + } + + // Convert selected map into an array + + deviceIDs := []string{} + + for deviceID := range selected { + deviceIDs = append(deviceIDs, deviceID) + } + + klog.V(2).Infof("Allocate deviceIds: %q", deviceIDs) + + return deviceIDs } // balancedPolicy is used for allocating GPU devices in balance. diff --git a/cmd/gpu_plugin/gpu_plugin_test.go b/cmd/gpu_plugin/gpu_plugin_test.go index 62536093..a5b59519 100644 --- a/cmd/gpu_plugin/gpu_plugin_test.go +++ b/cmd/gpu_plugin/gpu_plugin_test.go @@ -20,6 +20,7 @@ import ( "path" "path/filepath" "reflect" + "sort" "testing" "github.com/pkg/errors" @@ -99,12 +100,21 @@ func TestGetPreferredAllocation(t *testing.T) { rqt := &v1beta1.PreferredAllocationRequest{ ContainerRequests: []*v1beta1.ContainerPreferredAllocationRequest{ { - AvailableDeviceIDs: []string{"card0-4", "card1-1", "card2-3", "card2-4", "card2-1", "card1-0", "card1-4", "card3-4", "card1-2", "card0-1", "card2-0", "card2-2", "card1-3", "card0-2", "card3-0", "card3-3", "card0-3", "card0-0", "card3-1", "card3-2"}, + AvailableDeviceIDs: []string{"card0-4", "card0-2", "card1-1", "card2-3", "card2-4", "card2-1", "card1-0", "card1-4", "card3-4", "card1-2", "card0-1", "card2-0", "card2-2", "card1-3", "card3-0", "card3-3", "card0-3", "card0-0", "card3-1", "card3-2"}, AllocationSize: 4, }, }, } + rqtNotEnough := &v1beta1.PreferredAllocationRequest{ + ContainerRequests: []*v1beta1.ContainerPreferredAllocationRequest{ + { + AvailableDeviceIDs: []string{"card0-1", "card0-2", "card0-3", "card1-1"}, + AllocationSize: 3, + }, + }, + } + rqtErr := &v1beta1.PreferredAllocationRequest{ ContainerRequests: []*v1beta1.ContainerPreferredAllocationRequest{ { @@ -117,22 +127,24 @@ func TestGetPreferredAllocation(t *testing.T) { plugin := newDevicePlugin("", "", cliOptions{sharedDevNum: 5, resourceManagement: false, preferredAllocationPolicy: "none"}) response, _ := plugin.GetPreferredAllocation(rqt) - if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"card0-4", "card1-1", "card2-3", "card2-4"}) { - t.Error("Unexpected return value for none preferred allocation") + sort.Strings(response.ContainerResponses[0].DeviceIDs) + + if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"card0-4", "card1-1", "card2-3", "card3-4"}) { + t.Error("Unexpected return value for none preferred allocation", response.ContainerResponses[0].DeviceIDs) } plugin = newDevicePlugin("", "", cliOptions{sharedDevNum: 5, resourceManagement: false, preferredAllocationPolicy: "balanced"}) response, _ = plugin.GetPreferredAllocation(rqt) if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"card0-0", "card1-0", "card2-0", "card3-0"}) { - t.Error("Unexpected return value for balanced preferred allocation") + t.Error("Unexpected return value for balanced preferred allocation", response.ContainerResponses[0].DeviceIDs) } plugin = newDevicePlugin("", "", cliOptions{sharedDevNum: 5, resourceManagement: false, preferredAllocationPolicy: "packed"}) response, _ = plugin.GetPreferredAllocation(rqt) if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"card0-0", "card0-1", "card0-2", "card0-3"}) { - t.Error("Unexpected return value for packed preferred allocation") + t.Error("Unexpected return value for packed preferred allocation", response.ContainerResponses[0].DeviceIDs) } plugin = newDevicePlugin("", "", cliOptions{sharedDevNum: 5, resourceManagement: false, preferredAllocationPolicy: "none"}) @@ -141,6 +153,16 @@ func TestGetPreferredAllocation(t *testing.T) { if response != nil { t.Error("Fail to handle the input error that req.AllocationSize is greater than len(req.AvailableDeviceIDs).") } + + plugin = newDevicePlugin("", "", cliOptions{sharedDevNum: 5, resourceManagement: false, preferredAllocationPolicy: "none"}) + response, _ = plugin.GetPreferredAllocation(rqtNotEnough) + + sort.Strings(response.ContainerResponses[0].DeviceIDs) + + if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"card0-1", "card0-2", "card1-1"}) { + t.Error("Unexpected return value for none preferred allocation with too few separate devices", + response.ContainerResponses[0].DeviceIDs) + } } func TestAllocate(t *testing.T) {