gpu: Start using GetPreferredAllocation with fractional resources

Move reallocate logic to getpreferredallocation and simplify allocate to use the kubelet's device ids. Signed-off-by: Tuomas Katila <tuomas.katila@intel.com>
2025-06-03 03:59:37 +00:00 · 2022-02-18 08:29:35 +02:00 · 2022-02-18 08:29:35 +02:00 · 8f6a235b5d
commit 8f6a235b5d
parent db7e5bfc55
4 changed files with 760 additions and 154 deletions
--- a/cmd/gpu_plugin/gpu_plugin.go
+++ b/cmd/gpu_plugin/gpu_plugin.go
@ -197,6 +197,10 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi

 // Implement the PreferredAllocator interface.
 func (dp *devicePlugin) GetPreferredAllocation(rqt *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
+	if dp.resMan != nil {
+		return dp.resMan.GetPreferredFractionalAllocation(rqt)
+	}
+
 	response := &pluginapi.PreferredAllocationResponse{}

 	for _, req := range rqt.ContainerRequests {
@ -356,7 +360,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {

 func (dp *devicePlugin) Allocate(request *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
 	if dp.resMan != nil {
-		return dp.resMan.ReallocateWithFractionalResources(request)
+		return dp.resMan.CreateFractionalResourceResponse(request)
 	}

 	return nil, &dpapi.UseDefaultMethodError{}
--- a/cmd/gpu_plugin/gpu_plugin_test.go
+++ b/cmd/gpu_plugin/gpu_plugin_test.go
@ -48,11 +48,15 @@ func (n *mockNotifier) Notify(newDeviceTree dpapi.DeviceTree) {

 type mockResourceManager struct{}

-func (m *mockResourceManager) ReallocateWithFractionalResources(*v1beta1.AllocateRequest) (*v1beta1.AllocateResponse, error) {
+func (m *mockResourceManager) CreateFractionalResourceResponse(*v1beta1.AllocateRequest) (*v1beta1.AllocateResponse, error) {
 	return &v1beta1.AllocateResponse{}, &dpapi.UseDefaultMethodError{}
 }
 func (m *mockResourceManager) SetDevInfos(rm.DeviceInfoMap) {}

+func (m *mockResourceManager) GetPreferredFractionalAllocation(*v1beta1.PreferredAllocationRequest) (*v1beta1.PreferredAllocationResponse, error) {
+	return &v1beta1.PreferredAllocationResponse{}, &dpapi.UseDefaultMethodError{}
+}
+
 func createTestFiles(root string, devfsdirs, sysfsdirs []string, sysfsfiles map[string][]byte) (string, string, error) {
 	sysfs := path.Join(root, "sys")
 	devfs := path.Join(root, "dev")
--- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
+++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
@ -47,8 +47,6 @@ const (
 	grpcAddress    = "unix:///var/lib/kubelet/pod-resources/kubelet.sock"
 	grpcBufferSize = 4 * 1024 * 1024
 	grpcTimeout    = 5 * time.Second
-
-	retryTimeout = 1 * time.Second
 )

 // Errors.
@ -65,10 +63,13 @@ func (e *zeroPendingErr) Error() string {
 type podCandidate struct {
 	pod                     *v1.Pod
 	name                    string
-	allocatedContainers int
+	allocatedContainerCount int
 	allocationTargetNum     int
 }

+// DeviceInfo is a subset of deviceplugin.DeviceInfo
+// It's a lighter version of the full DeviceInfo as it is used
+// to store fractional devices.
 type DeviceInfo struct {
 	envs   map[string]string
 	nodes  []pluginapi.DeviceSpec
@ -79,18 +80,32 @@ type getClientFunc func(string, time.Duration, int) (podresourcesv1.PodResources

 // ResourceManager interface for the fractional resource handling.
 type ResourceManager interface {
-	ReallocateWithFractionalResources(*pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error)
+	CreateFractionalResourceResponse(*pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error)
+	GetPreferredFractionalAllocation(*pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error)
 	SetDevInfos(DeviceInfoMap)
 }

+type ContainerAssignments struct {
+	deviceIds map[string]bool
+	tileEnv   string
+}
+
+type PodAssignmentDetails struct {
+	containers []ContainerAssignments
+}
+
 type resourceManager struct {
-	deviceInfos      DeviceInfoMap
-	nodeName         string
 	clientset        kubernetes.Interface
+	deviceInfos      DeviceInfoMap
 	prGetClientFunc  getClientFunc
+	assignments      map[string]PodAssignmentDetails // pod name -> assignment details
+	nodeName         string
 	skipID           string
 	fullResourceName string
+	retryTimeout     time.Duration
+	cleanupInterval  time.Duration
 	mutex            sync.RWMutex // for devTree updates during scan
+	cleanupMutex     sync.RWMutex // for assignment details during cleanup
 }

 // NewDeviceInfo creates a new DeviceInfo.
@ -124,32 +139,104 @@ func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error
 		skipID:           skipID,
 		fullResourceName: fullResourceName,
 		prGetClientFunc:  podresources.GetV1Client,
+		assignments:      make(map[string]PodAssignmentDetails),
+		retryTimeout:     1 * time.Second,
+		cleanupInterval:  2 * time.Minute,
 	}

 	klog.Info("GPU device plugin resource manager enabled")

+	go func() {
+		ticker := time.NewTicker(rm.cleanupInterval)
+
+		for range ticker.C {
+			klog.V(4).Info("Running cleanup")
+
+			// Gather both running and pending pods. It might happen that
+			// cleanup is triggered between GetPreferredAllocation and Allocate
+			// and it would remove the assignment data for the soon-to-be allocated pod
+			running := rm.listPodsOnNodeWithState(string(v1.PodRunning))
+			for podName, podItem := range rm.listPodsOnNodeWithState(string(v1.PodPending)) {
+				running[podName] = podItem
+			}
+
+			func() {
+				rm.cleanupMutex.Lock()
+				defer rm.cleanupMutex.Unlock()
+
+				for podName := range rm.assignments {
+					if _, found := running[podName]; !found {
+						klog.V(4).Info("Removing from assignments: ", podName)
+						delete(rm.assignments, podName)
+					}
+				}
+			}()
+
+			klog.V(4).Info("Cleanup done")
+		}
+	}()
+
 	return &rm, nil
 }

-// ReallocateWithFractionalResources runs the fractional resource logic.
+// Generate a unique key for Pod.
+func getPodKey(pod *v1.Pod) string {
+	return pod.Namespace + "&" + pod.Name
+}
+
+// Generate a unique key for PodResources.
+func getPodResourceKey(res *podresourcesv1.PodResources) string {
+	return res.Namespace + "&" + res.Name
+}
+
+func (rm *resourceManager) listPodsOnNodeWithState(state string) map[string]*v1.Pod {
+	pods := make(map[string]*v1.Pod)
+
+	selector, err := fields.ParseSelector("spec.nodeName=" + rm.nodeName +
+		",status.phase=" + state)
+
+	if err != nil {
+		return pods
+	}
+
+	podList, err := rm.clientset.CoreV1().Pods(v1.NamespaceAll).List(context.Background(), metav1.ListOptions{
+		FieldSelector: selector.String(),
+	})
+
+	if err != nil {
+		return pods
+	}
+
+	for i := range podList.Items {
+		key := getPodKey(&podList.Items[i])
+		pods[key] = &podList.Items[i]
+	}
+
+	return pods
+}
+
+// CreateFractionalResourceResponse returns allocate response with the details
+// assigned in GetPreferredFractionalAllocation
 // This intentionally only logs errors and returns with the UseDefaultMethodError,
 // in case any errors are hit. This is to avoid clusters filling up with unexpected admission errors.
-func (rm *resourceManager) ReallocateWithFractionalResources(request *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
-	if !isInputOk(request, rm.skipID) {
+func (rm *resourceManager) CreateFractionalResourceResponse(request *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
+	if !isAllocateRequestOk(request, rm.skipID) {
 		// it is better to leave allocated gpu devices as is and return
 		return nil, &dpapi.UseDefaultMethodError{}
 	}

+	klog.V(4).Info("Proposed device ids: ", request.ContainerRequests[0].DevicesIDs)
+
 	podCandidate, err := rm.findAllocationPodCandidate()
-	if _, ok := err.(*retryErr); ok {
+	if errors.Is(err, &retryErr{}) {
 		klog.Warning("retrying POD resolving after sleeping")
-		time.Sleep(retryTimeout)
+		time.Sleep(rm.retryTimeout)

 		podCandidate, err = rm.findAllocationPodCandidate()
 	}

 	if err != nil {
-		if _, ok := err.(*zeroPendingErr); !ok {
+		if !errors.Is(err, &zeroPendingErr{}) {
 			klog.Error("allocation candidate not found, perhaps the GPU scheduler extender is not called, err:", err)
 		}
 		// it is better to leave allocated gpu devices as is and return
@ -157,13 +244,170 @@ func (rm *resourceManager) ReallocateWithFractionalResources(request *pluginapi.
 	}

 	pod := podCandidate.pod
-	cards := containerCards(pod, podCandidate.allocatedContainers)
-	affinityMask := containerTileAffinityMask(pod, podCandidate.allocatedContainers)

-	return rm.createAllocateResponse(cards, affinityMask)
+	rm.cleanupMutex.Lock()
+
+	assignment, found := rm.assignments[getPodKey(pod)]
+	if !found {
+		rm.cleanupMutex.Unlock()
+		klog.Error("couldn't find allocation info from assignments:", getPodKey(pod))
+
+		return nil, &dpapi.UseDefaultMethodError{}
 	}

-func isInputOk(rqt *pluginapi.AllocateRequest, skipID string) bool {
+	containerIndex := podCandidate.allocatedContainerCount
+
+	affinityMask := assignment.containers[containerIndex].tileEnv
+	getPrefDevices := assignment.containers[containerIndex].deviceIds
+
+	rm.cleanupMutex.Unlock()
+
+	devIds := request.ContainerRequests[0].DevicesIDs
+
+	// Check if all the preferred devices were also used
+	if len(devIds) != len(getPrefDevices) {
+		klog.Warningf("Allocate called with odd number of device IDs: %d vs %d", len(devIds), len(getPrefDevices))
+	}
+
+	for _, devID := range devIds {
+		if _, found := getPrefDevices[devID]; !found {
+			klog.Warningf("Not preferred device used in Allocate: %s (%v)", devID, getPrefDevices)
+		}
+	}
+
+	klog.V(4).Info("Allocate affinity mask: ", affinityMask)
+	klog.V(4).Info("Allocate device ids: ", devIds)
+
+	return rm.createAllocateResponse(devIds, affinityMask)
+}
+
+func (rm *resourceManager) GetPreferredFractionalAllocation(request *pluginapi.PreferredAllocationRequest) (
+	*pluginapi.PreferredAllocationResponse, error) {
+	if !isPreferredAllocationRequestOk(request, rm.skipID) {
+		// it is better to leave allocated gpu devices as is and return
+		return &pluginapi.PreferredAllocationResponse{}, nil
+	}
+
+	klog.V(4).Info("GetPreferredAllocation request: ", request)
+
+	podCandidate, err := rm.findAllocationPodCandidate()
+	if errors.Is(err, &retryErr{}) {
+		klog.Warning("retrying POD resolving after sleeping")
+		time.Sleep(rm.retryTimeout)
+
+		podCandidate, err = rm.findAllocationPodCandidate()
+	}
+
+	if err != nil {
+		if !errors.Is(err, &zeroPendingErr{}) {
+			klog.Error("allocation candidate not found, perhaps the GPU scheduler extender is not called, err:", err)
+		}
+
+		// Return empty response as returning an error causes
+		// the pod to be labeled as UnexpectedAdmissionError
+		return &pluginapi.PreferredAllocationResponse{}, nil
+	}
+
+	pod := podCandidate.pod
+	containerIndex := podCandidate.allocatedContainerCount
+	cards := containerCards(pod, containerIndex)
+	affinityMask := containerTileAffinityMask(pod, containerIndex)
+	podKey := getPodKey(pod)
+
+	creq := request.ContainerRequests[0]
+
+	klog.V(4).Info("Get preferred fractional allocation: ",
+		podKey, creq.AllocationSize, creq.MustIncludeDeviceIDs, creq.AvailableDeviceIDs)
+
+	deviceIds := selectDeviceIDsForContainer(
+		int(creq.AllocationSize), cards, creq.AvailableDeviceIDs, creq.MustIncludeDeviceIDs)
+
+	// Map container assignment details per pod name
+
+	rm.cleanupMutex.Lock()
+
+	assignments, found := rm.assignments[podKey]
+
+	if !found {
+		assignments.containers = make([]ContainerAssignments, podCandidate.allocationTargetNum)
+	}
+
+	assignments.containers[containerIndex].tileEnv = affinityMask
+	// Store device ids so we can double check the ones in Allocate
+	assignments.containers[containerIndex].deviceIds = make(map[string]bool)
+	for _, devID := range deviceIds {
+		assignments.containers[containerIndex].deviceIds[devID] = true
+	}
+
+	rm.assignments[podKey] = assignments
+
+	rm.cleanupMutex.Unlock()
+
+	klog.V(4).Info("Selected devices for container: ", deviceIds)
+
+	response := pluginapi.PreferredAllocationResponse{
+		ContainerResponses: []*pluginapi.ContainerPreferredAllocationResponse{
+			{DeviceIDs: deviceIds},
+		},
+	}
+
+	return &response, nil
+}
+
+// selectDeviceIDsForContainer selects suitable device ids from deviceIds and mustHaveDeviceIds
+// the selection is guided by the cards list.
+func selectDeviceIDsForContainer(requestedCount int, cards, deviceIds, mustHaveDeviceIds []string) []string {
+	getBaseCard := func(deviceId string) string {
+		return strings.Split(deviceId, "-")[0]
+	}
+
+	if requestedCount < len(cards) {
+		klog.Warningf("Requested count is less than card count: %d vs %d.", requestedCount, len(cards))
+		cards = cards[0:requestedCount]
+	}
+
+	if requestedCount > len(cards) {
+		klog.Warningf("Requested count is higher than card count: %d vs %d.", requestedCount, len(cards))
+	}
+
+	// map of cardX -> device id list
+	available := map[string][]string{}
+	// Keep the last used index so we can pick the next one
+	availableIndex := map[string]int{}
+
+	// Place must have IDs first so they get used
+	for _, devID := range mustHaveDeviceIds {
+		baseCard := getBaseCard(devID)
+		available[baseCard] = append(available[baseCard], devID)
+	}
+
+	for _, devID := range deviceIds {
+		baseCard := getBaseCard(devID)
+		available[baseCard] = append(available[baseCard], devID)
+	}
+
+	selected := []string{}
+
+	for _, card := range cards {
+		indexNow := availableIndex[card]
+
+		availableDevices, found := available[card]
+		if !found {
+			klog.Warningf("card %s is not found from known devices: %v", card, available)
+			continue
+		}
+
+		if indexNow < len(availableDevices) {
+			selected = append(selected, availableDevices[indexNow])
+			indexNow++
+			availableIndex[card] = indexNow
+		}
+	}
+
+	return selected
+}
+
+func isAllocateRequestOk(rqt *pluginapi.AllocateRequest, skipID string) bool {
 	// so far kubelet calls allocate for each container separately. If that changes, we need to refine our logic.
 	if len(rqt.ContainerRequests) != 1 {
 		klog.Warning("multi-container allocation request not supported")
@ -180,6 +424,23 @@ func isInputOk(rqt *pluginapi.AllocateRequest, skipID string) bool {
 	return true
 }

+func isPreferredAllocationRequestOk(rqt *pluginapi.PreferredAllocationRequest, skipID string) bool {
+	// so far kubelet calls allocate for each container separately. If that changes, we need to refine our logic.
+	if len(rqt.ContainerRequests) != 1 {
+		klog.Warning("multi-container allocation request not supported")
+		return false
+	}
+
+	crqt := rqt.ContainerRequests[0]
+	for _, id := range crqt.AvailableDeviceIDs {
+		if id == skipID {
+			return false // intentionally not printing anything, this request is skipped
+		}
+	}
+
+	return true
+}
+
 // findAllocationPodCandidate tries to find the best allocation candidate pod, which must be:
 //  -pending for this node
 //  -using GPU resources in its spec
@ -230,6 +491,7 @@ func (rm *resourceManager) findAllocationPodCandidate() (*podCandidate, error) {
 			}
 		}

+		// .name here refers to a namespace+name combination
 		sort.Slice(timestampedCandidates,
 			func(i, j int) bool {
 				return pendingPods[timestampedCandidates[i].name].Annotations[gasTSAnnotation] <
@ -249,29 +511,11 @@ func (rm *resourceManager) findAllocationPodCandidate() (*podCandidate, error) {

 // getNodePendingGPUPods returns a map of pod names -> pods that are pending and use the gpu.
 func (rm *resourceManager) getNodePendingGPUPods() (map[string]*v1.Pod, error) {
-	selector, err := fields.ParseSelector("spec.nodeName=" + rm.nodeName +
-		",status.phase=" + string(v1.PodPending))
+	pendingPods := rm.listPodsOnNodeWithState(string(v1.PodPending))

-	if err != nil {
-		return nil, errors.Wrap(err, "unable to parse selector")
-	}
-
-	pendingPodList, err := rm.clientset.CoreV1().Pods(v1.NamespaceAll).List(context.Background(), metav1.ListOptions{
-		FieldSelector: selector.String(),
-	})
-
-	if err != nil {
-		return nil, errors.Wrap(err, "unable to list pods")
-	}
-
-	// make a map ouf of the list, accept only GPU-using pods
-	pendingPods := make(map[string]*v1.Pod)
-
-	for i := range pendingPodList.Items {
-		pod := &pendingPodList.Items[i]
-
-		if numGPUUsingContainers(pod, rm.fullResourceName) > 0 {
-			pendingPods[pod.Name] = pod
+	for podName, pod := range pendingPods {
+		if numGPUUsingContainers(pod, rm.fullResourceName) == 0 {
+			delete(pendingPods, podName)
 		}
 	}

@ -312,13 +556,15 @@ func (rm *resourceManager) findAllocationPodCandidates(pendingPods map[string]*v
 			}
 		}

-		if pod, pending := pendingPods[podRes.Name]; pending {
+		key := getPodResourceKey(podRes)
+
+		if pod, pending := pendingPods[key]; pending {
 			allocationTargetNum := numGPUUsingContainers(pod, rm.fullResourceName)
 			if numContainersAllocated < allocationTargetNum {
 				candidate := podCandidate{
 					pod:                     pod,
-					name:                podRes.Name,
-					allocatedContainers: numContainersAllocated,
+					name:                    key,
+					allocatedContainerCount: numContainersAllocated,
 					allocationTargetNum:     allocationTargetNum,
 				}
 				candidates = append(candidates, candidate)
@ -335,19 +581,17 @@ func (rm *resourceManager) SetDevInfos(deviceInfos DeviceInfoMap) {
 	rm.deviceInfos = deviceInfos
 }

-func (rm *resourceManager) createAllocateResponse(cards []string, tileAffinityMask string) (*pluginapi.AllocateResponse, error) {
+func (rm *resourceManager) createAllocateResponse(deviceIds []string, tileAffinityMask string) (*pluginapi.AllocateResponse, error) {
 	rm.mutex.Lock()
 	defer rm.mutex.Unlock()

 	allocateResponse := pluginapi.AllocateResponse{}
 	cresp := pluginapi.ContainerAllocateResponse{}

-	for _, card := range cards {
-		newDeviceID := card + "-0"
-
-		dev, ok := rm.deviceInfos[newDeviceID]
+	for _, devID := range deviceIds {
+		dev, ok := rm.deviceInfos[devID]
 		if !ok {
-			klog.Warningf("No device info for %q, using default allocation method devices", newDeviceID)
+			klog.Warningf("No device info for %q, using default allocation method devices", devID)
 			return nil, &dpapi.UseDefaultMethodError{}
 		}

@ -376,6 +620,7 @@ func (rm *resourceManager) createAllocateResponse(cards []string, tileAffinityMa
 		if cresp.Envs == nil {
 			cresp.Envs = make(map[string]string)
 		}
+
 		cresp.Envs[levelZeroAffinityMaskEnvVar] = tileAffinityMask
 	}

@ -416,7 +661,7 @@ func containerCards(pod *v1.Pod, gpuUsingContainerIndex int) []string {
 		cards := strings.Split(cardList, ",")
 		if len(cards) > 0 && len(cardList) > 0 {
 			if gpuUsingContainerIndex == i {
-				klog.V(3).Infof("Cards for container nr %v in pod %v are %v", gpuUsingContainerIndex, pod.Name, cards)
+				klog.V(3).Infof("Cards for container nr %v in pod %v are %v", gpuUsingContainerIndex, getPodKey(pod), cards)
 				return cards
 			}
 			i++
@ -429,16 +674,13 @@ func containerCards(pod *v1.Pod, gpuUsingContainerIndex int) []string {
 }

 func convertTileInfoToEnvMask(tileInfo string) string {
-	cardTileList := strings.Split(tileInfo, ",")
+	cards := strings.Split(tileInfo, ",")

-	var tileIndices []string
+	tileIndices := make([]string, len(cards))

-	for i, cardList := range cardTileList {
-		cards := strings.Split(cardList, ",")
-
-		for _, cardTileCombos := range cards {
+	for i, cardTileCombos := range cards {
 		cardTileSplit := strings.Split(cardTileCombos, ":")
-			if len(cardTileSplit) < 2 {
+		if len(cardTileSplit) != 2 {
 			klog.Warningf("invalid card tile combo string (%v)", cardTileCombos)
 			return ""
 		}
@ -448,21 +690,26 @@ func convertTileInfoToEnvMask(tileInfo string) string {
 		var combos []string

 		for _, tile := range tiles {
+			if !strings.HasPrefix(tile, "gt") {
+				klog.Warningf("invalid tile syntax (%v)", tile)
+				return ""
+			}
+
 			tileNoStr := strings.TrimPrefix(tile, "gt")
 			tileNo, err := strconv.ParseInt(tileNoStr, 10, 16)

 			if err != nil {
-					continue
+				klog.Warningf("invalid tile syntax (%v)", tile)
+				return ""
 			}

 			levelZeroCardTileCombo :=
 				strconv.FormatInt(int64(i), 10) + "." +
-						strconv.FormatInt(int64(tileNo), 10)
+					strconv.FormatInt(tileNo, 10)
 			combos = append(combos, levelZeroCardTileCombo)
 		}

-			tileIndices = append(tileIndices, strings.Join(combos, ","))
-		}
+		tileIndices[i] = strings.Join(combos, ",")
 	}

 	return strings.Join(tileIndices, ",")
@ -472,11 +719,12 @@ func convertTileInfoToEnvMask(tileInfo string) string {
 // Indices should be passed to level zero env variable to guide execution
 // gpuUsingContainerIndex 0 == first gpu-using container in the pod.
 // annotation example:
-// gas-container-tiles=card0:gt0+gt1,card1:gt0|card2:gt1+gt2||card0:gt3
+// gas-container-tiles=card0:gt0+gt1,card1:gt0|card2:gt1+gt2||card0:gt3.
 func containerTileAffinityMask(pod *v1.Pod, gpuUsingContainerIndex int) string {
 	fullAnnotation := pod.Annotations[gasTileAnnotation]
+	onlyDividers := strings.Count(fullAnnotation, "|") == len(fullAnnotation)

-	if fullAnnotation == "" {
+	if fullAnnotation == "" || onlyDividers {
 		return ""
 	}

@ -484,6 +732,7 @@ func containerTileAffinityMask(pod *v1.Pod, gpuUsingContainerIndex int) string {
 	klog.Infof("%s:%v", fullAnnotation, tileLists)

 	i := 0
+
 	for _, containerTileInfo := range tileLists {
 		if len(containerTileInfo) == 0 {
 			continue
@ -497,6 +746,7 @@ func containerTileAffinityMask(pod *v1.Pod, gpuUsingContainerIndex int) string {
 	}

 	klog.Warningf("couldn't find tile info for gpu using container index %v", gpuUsingContainerIndex)
+
 	return ""
 }

--- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go
+++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go
@ -72,7 +72,7 @@ func (w *mockPodResources) List(ctx context.Context,
 	resp := podresourcesv1.ListPodResourcesResponse{}
 	for _, pod := range w.pods {
 		resp.PodResources = append(resp.PodResources, &podresourcesv1.PodResources{
-			Name: pod.ObjectMeta.Name, Containers: []*podresourcesv1.ContainerResources{{}},
+			Name: pod.ObjectMeta.Name, Namespace: pod.ObjectMeta.Namespace, Containers: []*podresourcesv1.ContainerResources{{}},
 		})
 	}

@ -101,6 +101,8 @@ func newMockResourceManager(pods []v1.Pod) ResourceManager {
 		},
 		skipID:           "all",
 		fullResourceName: "gpu.intel.com/i915",
+		assignments:      make(map[string]PodAssignmentDetails),
+		retryTimeout:     1 * time.Millisecond,
 	}

 	deviceInfoMap := NewDeviceInfoMap()
@ -120,10 +122,20 @@ func newMockResourceManager(pods []v1.Pod) ResourceManager {
 	return &rm
 }

+type preferredTestCase struct {
+	name                 string
+	pods                 []v1.Pod
+	containerRequests    []*v1beta1.ContainerPreferredAllocationRequest
+	expectDevices        []string
+	expectedContainerLen int
+}
+
 type testCase struct {
 	name                  string
 	pods                  []v1.Pod
+	prefContainerRequests []*v1beta1.ContainerPreferredAllocationRequest
 	containerRequests     []*v1beta1.ContainerAllocateRequest
+	prefExpectErr         bool
 	expectErr             bool
 }

@ -136,7 +148,166 @@ func TestNewResourceManager(t *testing.T) {
 	}
 }

-func TestReallocateWithFractionalResources(t *testing.T) {
+func TestGetPreferredFractionalAllocation(t *testing.T) {
+	properTestPod := v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Annotations: map[string]string{gasCardAnnotation: "card0"},
+			Name:        "TestPod",
+			Namespace:   "neimspeis",
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Resources: v1.ResourceRequirements{
+						Requests: v1.ResourceList{
+							"gpu.intel.com/i915": resource.MustParse("1"),
+						},
+					},
+				},
+			},
+		},
+	}
+
+	gpuLessTestPod := v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "TestPodLessGpu",
+			Namespace: "neimspeis",
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Resources: v1.ResourceRequirements{
+						Requests: v1.ResourceList{
+							"gpu.less.com/i915": resource.MustParse("1"),
+						},
+					},
+				},
+			},
+		},
+	}
+
+	properTestPodMultiGpu := *properTestPod.DeepCopy()
+	properTestPodMultiGpu.ObjectMeta.Annotations[gasCardAnnotation] = "card0,card1"
+
+	properTestPodMultiGpu2 := *properTestPod.DeepCopy()
+	properTestPodMultiGpu2.ObjectMeta.Annotations[gasCardAnnotation] = "card0,card1,card0"
+
+	monitoringPod := *properTestPod.DeepCopy()
+	delete(monitoringPod.Spec.Containers[0].Resources.Requests, "gpu.intel.com/i915")
+	monitoringPod.Spec.Containers[0].Resources.Requests["gpu.intel.com/i915_monitoring"] = resource.MustParse("1")
+
+	allContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"all"},
+			AllocationSize: 1},
+	}
+
+	properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1"},
+			AllocationSize: 1},
+	}
+
+	outofRangePrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card6-0", "card5-1"},
+			AllocationSize: 1},
+	}
+
+	mustHaveContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1"},
+			MustIncludeDeviceIDs: []string{"card0-2"},
+			AllocationSize:       2},
+	}
+
+	properPrefContainerRequests3 := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1"},
+			AllocationSize: 3},
+	}
+
+	testCases := []preferredTestCase{
+		{
+			name:                 "Wrong number of container requests should result in empty response",
+			pods:                 []v1.Pod{properTestPod},
+			containerRequests:    nil,
+			expectedContainerLen: 0,
+		},
+		{
+			name:                 "Proper number of containers with good devices",
+			pods:                 []v1.Pod{properTestPod},
+			containerRequests:    properPrefContainerRequests,
+			expectDevices:        []string{"card0-0"},
+			expectedContainerLen: 1,
+		},
+		{
+			name:                 "Inconsistent devices vs. gas' annotated ones",
+			pods:                 []v1.Pod{properTestPod},
+			containerRequests:    outofRangePrefContainerRequests,
+			expectDevices:        []string{},
+			expectedContainerLen: 1,
+		},
+		{
+			name:                 "Preferred allocation is with must have device ids",
+			pods:                 []v1.Pod{properTestPodMultiGpu},
+			containerRequests:    mustHaveContainerRequests,
+			expectDevices:        []string{"card0-2", "card1-0"},
+			expectedContainerLen: 1,
+		},
+		{
+			name:                 "Duplicate card requesting pod",
+			pods:                 []v1.Pod{properTestPodMultiGpu2},
+			containerRequests:    properPrefContainerRequests3,
+			expectDevices:        []string{"card0-0", "card1-0", "card0-1"},
+			expectedContainerLen: 1,
+		},
+		{
+			name:                 "Allocation size is larger than cards assigned",
+			pods:                 []v1.Pod{properTestPodMultiGpu},
+			containerRequests:    properPrefContainerRequests3,
+			expectDevices:        []string{"card0-0", "card1-0"},
+			expectedContainerLen: 1,
+		},
+		{
+			name:                 "Monitoring pod is being allocated",
+			pods:                 []v1.Pod{monitoringPod},
+			containerRequests:    allContainerRequests,
+			expectDevices:        []string{},
+			expectedContainerLen: 0,
+		},
+		{
+			name:                 "Two pods with one without GPU",
+			pods:                 []v1.Pod{properTestPod, gpuLessTestPod},
+			containerRequests:    properPrefContainerRequests,
+			expectDevices:        []string{"card0-0"},
+			expectedContainerLen: 1,
+		},
+	}
+
+	for _, tCase := range testCases {
+		rm := newMockResourceManager(tCase.pods)
+		resp, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
+			ContainerRequests: tCase.containerRequests,
+		})
+
+		if perr != nil {
+			t.Errorf("test %v unexpected failure, err:%v", tCase.name, perr)
+		}
+
+		if perr == nil {
+			// check response
+			expectTruef(len(resp.ContainerResponses) == tCase.expectedContainerLen, t, tCase.name, "wrong number of container responses, expected 1")
+
+			if len(tCase.expectDevices) > 0 {
+				expectTruef(len(resp.ContainerResponses[0].DeviceIDs) == len(tCase.expectDevices), t, tCase.name,
+					"wrong number of device ids: %d (%v)", len(resp.ContainerResponses[0].DeviceIDs), resp.ContainerResponses[0].DeviceIDs)
+
+				for i, expecteDevice := range tCase.expectDevices {
+					expectTruef(resp.ContainerResponses[0].DeviceIDs[i] == expecteDevice, t, tCase.name,
+						"wrong device id selected: %s", resp.ContainerResponses[0].DeviceIDs[i])
+				}
+			}
+		}
+	}
+}
+
+func TestCreateFractionalResourceResponse(t *testing.T) {
 	properTestPod := v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Annotations: map[string]string{gasCardAnnotation: "card0"},
@ -167,46 +338,65 @@ func TestReallocateWithFractionalResources(t *testing.T) {

 	properContainerRequests := []*v1beta1.ContainerAllocateRequest{{DevicesIDs: []string{"card0-0"}}}

+	properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1"},
+			AllocationSize: 1},
+	}
+
 	testCases := []testCase{
 		{
 			name:                  "Wrong number of container requests should fail",
 			pods:                  []v1.Pod{properTestPod},
+			prefContainerRequests: properPrefContainerRequests,
+			prefExpectErr:         false,
 			containerRequests:     []*v1beta1.ContainerAllocateRequest{},
 			expectErr:             true,
 		},
 		{
 			name:                  "Request for monitor resource should fail",
 			pods:                  []v1.Pod{properTestPod},
+			prefContainerRequests: properPrefContainerRequests,
+			prefExpectErr:         true,
 			containerRequests:     []*v1beta1.ContainerAllocateRequest{{DevicesIDs: []string{"all"}}},
 			expectErr:             true,
 		},
 		{
 			name:                  "Zero pending pods should fail",
 			pods:                  []v1.Pod{},
+			prefContainerRequests: properPrefContainerRequests,
+			prefExpectErr:         true,
 			containerRequests:     properContainerRequests,
 			expectErr:             true,
 		},
 		{
 			name:                  "Single pending pod without annotations should fail",
 			pods:                  []v1.Pod{unAnnotatedTestPod},
+			prefContainerRequests: properPrefContainerRequests,
+			prefExpectErr:         true,
 			containerRequests:     properContainerRequests,
 			expectErr:             true,
 		},
 		{
 			name:                  "Single pending pod should succeed",
 			pods:                  []v1.Pod{properTestPod},
+			prefContainerRequests: properPrefContainerRequests,
+			prefExpectErr:         true,
 			containerRequests:     properContainerRequests,
 			expectErr:             false,
 		},
 		{
 			name:                  "Two pending pods without timestamps should fail",
 			pods:                  []v1.Pod{properTestPod, properTestPod2},
+			prefContainerRequests: properPrefContainerRequests,
+			prefExpectErr:         true,
 			containerRequests:     properContainerRequests,
 			expectErr:             true,
 		},
 		{
 			name:                  "Two pending pods with timestamps should reduce to one candidate and succeed",
 			pods:                  []v1.Pod{timeStampedProperTestPod, timeStampedProperTestPod2},
+			prefContainerRequests: properPrefContainerRequests,
+			prefExpectErr:         true,
 			containerRequests:     properContainerRequests,
 			expectErr:             false,
 		},
@ -214,7 +404,15 @@ func TestReallocateWithFractionalResources(t *testing.T) {

 	for _, tCase := range testCases {
 		rm := newMockResourceManager(tCase.pods)
-		resp, err := rm.ReallocateWithFractionalResources(&v1beta1.AllocateRequest{
+		_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
+			ContainerRequests: tCase.prefContainerRequests,
+		})
+
+		if (perr != nil) && !tCase.prefExpectErr {
+			t.Errorf("test %v unexpected failure, err:%v", tCase.name, perr)
+		}
+
+		resp, err := rm.CreateFractionalResourceResponse(&v1beta1.AllocateRequest{
 			ContainerRequests: tCase.containerRequests,
 		})

@ -239,7 +437,7 @@ func TestReallocateWithFractionalResources(t *testing.T) {
 	}
 }

-func TestReallocateWithFractionalResourcesWithOneCardTwoTiles(t *testing.T) {
+func TestCreateFractionalResourceResponseWithOneCardTwoTiles(t *testing.T) {
 	properTestPod := v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Annotations: map[string]string{
@ -260,27 +458,40 @@ func TestReallocateWithFractionalResourcesWithOneCardTwoTiles(t *testing.T) {
 		},
 	}

+	properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1"},
+			AllocationSize: 1},
+	}
+
 	properContainerRequests := []*v1beta1.ContainerAllocateRequest{{DevicesIDs: []string{"card0-0"}}}

 	tCase := testCase{
 		name:                  "Single pending pod with two tiles should succeed",
 		pods:                  []v1.Pod{properTestPod},
+		prefContainerRequests: properPrefContainerRequests,
+		prefExpectErr:         false,
 		containerRequests:     properContainerRequests,
 		expectErr:             false,
 	}

 	rm := newMockResourceManager(tCase.pods)
-	resp, err := rm.ReallocateWithFractionalResources(&v1beta1.AllocateRequest{
+
+	_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
+		ContainerRequests: tCase.prefContainerRequests,
+	})
+
+	if (perr != nil) && !tCase.prefExpectErr {
+		t.Errorf("test %v unexpected failure, err:%v", tCase.name, perr)
+	}
+
+	resp, err := rm.CreateFractionalResourceResponse(&v1beta1.AllocateRequest{
 		ContainerRequests: tCase.containerRequests,
 	})

 	if (err != nil) && !tCase.expectErr {
 		t.Errorf("test %v unexpected failure, err:%v", tCase.name, err)
 	}
-	if err == nil {
-		if tCase.expectErr {
-			t.Errorf("test %v unexpected success", tCase.name)
-		} else {
+
 	// check response
 	expectTruef(len(resp.ContainerResponses) == 1, t, tCase.name, "wrong number of container responses, expected 1")
 	expectTruef(len(resp.ContainerResponses[0].Envs) == 2, t, tCase.name, "wrong number of env variables in container response, expected 2")
@ -288,10 +499,8 @@ func TestReallocateWithFractionalResourcesWithOneCardTwoTiles(t *testing.T) {
 	expectTruef(resp.ContainerResponses[0].Envs[levelZeroAffinityMaskEnvVar] == "0.0,0.1", t, tCase.name, "l0 affinity mask is incorrect")
 	expectTruef(len(resp.ContainerResponses[0].Devices) == 1, t, tCase.name, "wrong number of devices, expected 1")
 }
-	}
-}

-func TestReallocateWithFractionalResourcesWithTwoCardsOneTile(t *testing.T) {
+func TestCreateFractionalResourceResponseWithTwoCardsOneTile(t *testing.T) {
 	properTestPod := v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Annotations: map[string]string{
@ -312,23 +521,40 @@ func TestReallocateWithFractionalResourcesWithTwoCardsOneTile(t *testing.T) {
 		},
 	}

+	properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1"},
+			AllocationSize: 1},
+	}
+
 	properContainerRequests := []*v1beta1.ContainerAllocateRequest{{DevicesIDs: []string{"card1-0", "card2-0"}}}

 	tCase := testCase{
 		name:                  "Single pending pod with two cards and one tile each should succeed",
 		pods:                  []v1.Pod{properTestPod},
+		prefContainerRequests: properPrefContainerRequests,
+		prefExpectErr:         false,
 		containerRequests:     properContainerRequests,
 		expectErr:             false,
 	}

 	rm := newMockResourceManager(tCase.pods)
-	resp, err := rm.ReallocateWithFractionalResources(&v1beta1.AllocateRequest{
+
+	_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
+		ContainerRequests: tCase.prefContainerRequests,
+	})
+
+	if (perr != nil) && !tCase.prefExpectErr {
+		t.Errorf("test %v unexpected failure, err:%v", tCase.name, perr)
+	}
+
+	resp, err := rm.CreateFractionalResourceResponse(&v1beta1.AllocateRequest{
 		ContainerRequests: tCase.containerRequests,
 	})

 	if (err != nil) && !tCase.expectErr {
 		t.Errorf("test %v unexpected failure, err:%v", tCase.name, err)
 	}
+
 	if err == nil {
 		if tCase.expectErr {
 			t.Errorf("test %v unexpected success", tCase.name)
@ -342,7 +568,7 @@ func TestReallocateWithFractionalResourcesWithTwoCardsOneTile(t *testing.T) {
 	}
 }

-func TestReallocateWithFractionalResourcesWithThreeCardsTwoTiles(t *testing.T) {
+func TestCreateFractionalResourceResponseWithThreeCardsTwoTiles(t *testing.T) {
 	properTestPod := v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Annotations: map[string]string{
@ -363,23 +589,40 @@ func TestReallocateWithFractionalResourcesWithThreeCardsTwoTiles(t *testing.T) {
 		},
 	}

+	properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1", "card2-0", "card2-1"},
+			AllocationSize: 1},
+	}
+
 	properContainerRequests := []*v1beta1.ContainerAllocateRequest{{DevicesIDs: []string{"card0-0", "card1-0", "card2-0"}}}

 	tCase := testCase{
 		name:                  "Single pending pod with three cards and two tiles each should succeed",
 		pods:                  []v1.Pod{properTestPod},
+		prefContainerRequests: properPrefContainerRequests,
+		prefExpectErr:         false,
 		containerRequests:     properContainerRequests,
 		expectErr:             false,
 	}

 	rm := newMockResourceManager(tCase.pods)
-	resp, err := rm.ReallocateWithFractionalResources(&v1beta1.AllocateRequest{
+
+	_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
+		ContainerRequests: tCase.prefContainerRequests,
+	})
+
+	if (perr != nil) && !tCase.prefExpectErr {
+		t.Errorf("test %v unexpected failure, err:%v", tCase.name, perr)
+	}
+
+	resp, err := rm.CreateFractionalResourceResponse(&v1beta1.AllocateRequest{
 		ContainerRequests: tCase.containerRequests,
 	})

 	if (err != nil) && !tCase.expectErr {
 		t.Errorf("test %v unexpected failure, err:%v", tCase.name, err)
 	}
+
 	if err == nil {
 		if tCase.expectErr {
 			t.Errorf("test %v unexpected success", tCase.name)
@ -393,7 +636,7 @@ func TestReallocateWithFractionalResourcesWithThreeCardsTwoTiles(t *testing.T) {
 	}
 }

-func TestReallocateWithFractionalResourcesWithMultipleContainersTileEach(t *testing.T) {
+func TestCreateFractionalResourceResponseWithMultipleContainersTileEach(t *testing.T) {
 	properTestPod := v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Annotations: map[string]string{
@ -421,23 +664,44 @@ func TestReallocateWithFractionalResourcesWithMultipleContainersTileEach(t *test
 		},
 	}

-	properContainerRequests := []*v1beta1.ContainerAllocateRequest{{DevicesIDs: []string{"card1-0"}}, {DevicesIDs: []string{"card2-0"}}}
+	properPrefContainerRequests := []*v1beta1.ContainerPreferredAllocationRequest{
+		{AvailableDeviceIDs: []string{"card0-0", "card0-1", "card1-0", "card1-1", "card2-0", "card2-1"},
+			AllocationSize: 1},
+	}
+	_ = properPrefContainerRequests
+
+	properContainerRequests := []*v1beta1.ContainerAllocateRequest{
+		{DevicesIDs: []string{"card1-0"}},
+		{DevicesIDs: []string{"card2-0"}},
+	}

 	tCase := testCase{
-		name:              "Single pending pod with two containers with one tile each should fail",
+		name:                  "Single pending pod with two containers with one tile each should FAIL",
 		pods:                  []v1.Pod{properTestPod},
+		prefContainerRequests: properPrefContainerRequests,
+		prefExpectErr:         false,
 		containerRequests:     properContainerRequests,
 		expectErr:             true,
 	}

 	rm := newMockResourceManager(tCase.pods)
-	_, err := rm.ReallocateWithFractionalResources(&v1beta1.AllocateRequest{
+
+	_, perr := rm.GetPreferredFractionalAllocation(&v1beta1.PreferredAllocationRequest{
+		ContainerRequests: properPrefContainerRequests,
+	})
+
+	if (perr != nil) && !tCase.prefExpectErr {
+		t.Errorf("test %v unexpected failure, err:%v", tCase.name, perr)
+	}
+
+	_, err := rm.CreateFractionalResourceResponse(&v1beta1.AllocateRequest{
 		ContainerRequests: tCase.containerRequests,
 	})

 	if (err != nil) && !tCase.expectErr {
 		t.Errorf("test %v unexpected failure, err:%v", tCase.name, err)
 	}
+
 	if err == nil {
 		if tCase.expectErr {
 			t.Errorf("test %v unexpected success", tCase.name)
@ -448,8 +712,8 @@ func TestReallocateWithFractionalResourcesWithMultipleContainersTileEach(t *test
 func TestTileAnnotationParsing(t *testing.T) {
 	type parseTest struct {
 		line   string
-		index  int
 		result string
+		index  int
 	}

 	parseTests := []parseTest{
@ -488,11 +752,36 @@ func TestTileAnnotationParsing(t *testing.T) {
 			index:  2,
 			result: "0.0",
 		},
+		{
+			line:   "||card5:gt0,card6:gt4||",
+			index:  0,
+			result: "0.0,1.4",
+		},
+		{
+			line:   "||card5:gt0,card6:gt4||",
+			index:  1,
+			result: "",
+		},
+		{
+			line:   "||card5:gt0,card:6:gt4||",
+			index:  0,
+			result: "",
+		},
+		{
+			line:   "||card5:gt0,card6:gt+gt+gt||",
+			index:  0,
+			result: "",
+		},
 		{
 			line:   "card1:gtX",
 			index:  0,
 			result: "",
 		},
+		{
+			line:   "card1:64X",
+			index:  0,
+			result: "",
+		},
 		{
 			line:   "|",
 			index:  0,
@ -529,6 +818,65 @@ func TestTileAnnotationParsing(t *testing.T) {
 	}
 }

+func TestSelectDeviceIDsForContainerDoubleCards(t *testing.T) {
+	cards := []string{
+		"card0",
+		"card1",
+	}
+
+	deviceIds := []string{
+		"card0-0",
+		"card0-1",
+		"card0-2",
+		"card1-0",
+		"card1-1",
+		"card1-2",
+	}
+
+	selected := selectDeviceIDsForContainer(2, cards, deviceIds, []string{})
+	if len(selected) != 2 {
+		t.Errorf("Not the correct amount of devices were selected")
+	}
+
+	correctDevices := map[string]bool{
+		"card0-0": false,
+		"card1-0": false,
+	}
+
+	for _, selected := range selected {
+		correctDevices[selected] = true
+	}
+
+	for dev, used := range correctDevices {
+		if !used {
+			t.Errorf("correct device was not used: %s", dev)
+		}
+	}
+}
+
+func TestSelectDeviceIDsForContainerSingleCard(t *testing.T) {
+	cards := []string{
+		"card2",
+	}
+
+	deviceIds := []string{
+		"card0-0",
+		"card0-1",
+		"card1-0",
+		"card2-0",
+		"card2-1",
+	}
+
+	selected := selectDeviceIDsForContainer(1, cards, deviceIds, []string{})
+	if len(selected) != 1 {
+		t.Errorf("Not the correct amount of devices were selected")
+	}
+
+	if selected[0] != "card2-0" {
+		t.Errorf("First selection is wrong: %s vs %s", selected[0], "card2-0")
+	}
+}
+
 func expectTruef(predicate bool, t *testing.T, testName, format string, args ...interface{}) {
 	if !predicate {
 		t.Helper()