From 7ca5cfcfd6d0610dae4f106a98bde50cdd685b6d Mon Sep 17 00:00:00 2001 From: Ukri Niemimuukko Date: Fri, 21 May 2021 16:21:56 +0300 Subject: [PATCH] add pf skip to gpu nfdhook This corresponds to the previous gpu-plugin skip code. Signed-off-by: Ukri Niemimuukko --- Makefile | 2 +- cmd/gpu_nfdhook/labeler.go | 17 +++++++++++++---- cmd/gpu_nfdhook/labeler_test.go | 20 +++++++++++++++++++- cmd/gpu_plugin/gpu_plugin.go | 4 ++-- cmd/internal/pluginutils/sriov.go | 31 +++++++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 cmd/internal/pluginutils/sriov.go diff --git a/Makefile b/Makefile index d7b8c46e..f5dc4528 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ OLM_MANIFESTS = deployments/operator/manifests WEBHOOK_IMAGE_FILE = intel-fpga-admissionwebhook-devel.tgz pkgs = $(shell $(GO) list ./... | grep -v vendor | grep -v e2e | grep -v envtest) -cmds = $(shell ls cmd) +cmds = $(shell ls --ignore=internal cmd) e2e_tmp_dir := $(shell mktemp -u -t e2e-tests.XXXXXXXXXX) all: build diff --git a/cmd/gpu_nfdhook/labeler.go b/cmd/gpu_nfdhook/labeler.go index aac11371..f93b3fdd 100644 --- a/cmd/gpu_nfdhook/labeler.go +++ b/cmd/gpu_nfdhook/labeler.go @@ -24,6 +24,7 @@ import ( "strconv" "strings" + "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/pluginutils" "github.com/pkg/errors" "k8s.io/klog/v2" ) @@ -86,6 +87,11 @@ func (l *labeler) scan() ([]string, error) { continue } + if pluginutils.IsSriovPFwithVFs(path.Join(l.sysfsDRMDir, f.Name())) { + klog.V(4).Infof("Skipping PF with VF") + continue + } + _, err = os.ReadDir(path.Join(l.sysfsDRMDir, f.Name(), "device/drm")) if err != nil { return gpuNameList, errors.Wrap(err, "Can't read device folder") @@ -221,10 +227,13 @@ func (l *labeler) createLabels() error { l.labels.addNumericLabel(labelNamespace+"memory.max", int64(memoryAmount)) } gpuCount := len(gpuNameList) - // add gpu list label (example: "card0.card1.card2") - l.labels[labelNamespace+gpuListLabelName] = strings.Join(gpuNameList, ".") - // all GPUs get default number of millicores (1000) - l.labels.addNumericLabel(labelNamespace+millicoreLabelName, int64(millicoresPerGPU*gpuCount)) + if gpuCount > 0 { + // add gpu list label (example: "card0.card1.card2") + l.labels[labelNamespace+gpuListLabelName] = strings.Join(gpuNameList, ".") + + // all GPUs get default number of millicores (1000) + l.labels.addNumericLabel(labelNamespace+millicoreLabelName, int64(millicoresPerGPU*gpuCount)) + } return nil } diff --git a/cmd/gpu_nfdhook/labeler_test.go b/cmd/gpu_nfdhook/labeler_test.go index 7975cca4..bf0fe605 100644 --- a/cmd/gpu_nfdhook/labeler_test.go +++ b/cmd/gpu_nfdhook/labeler_test.go @@ -63,6 +63,24 @@ func getTestCases() []testcase { "gpu.intel.com/cards": "card0", }, }, + { + sysfsdirs: []string{ + "card0/device/drm/card0", + }, + sysfsfiles: map[string][]byte{ + "card0/device/vendor": []byte("0x8086"), + "card0/device/sriov_numvfs": []byte("1"), + }, + name: "pf with vfs", + memoryOverride: 16000000000, + capabilityFile: map[string][]byte{ + "0/i915_capabilities": []byte( + "platform: new\n" + + "gen: 9"), + }, + expectedRetval: nil, + expectedLabels: labelMap{}, + }, { sysfsdirs: []string{ "card0/device/drm/card0", @@ -244,7 +262,7 @@ func TestLabeling(t *testing.T) { t.Errorf("unexpected return value") } if tc.expectedRetval == nil && !reflect.DeepEqual(labeler.labels, tc.expectedLabels) { - t.Errorf("label mismatch with expectation:\n%v\n%v\n", labeler.labels, tc.expectedLabels) + t.Errorf("test %v label mismatch with expectation:\n%v\n%v\n", tc.name, labeler.labels, tc.expectedLabels) } for filename := range tc.capabilityFile { os.Remove(path.Join(root, filename)) diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index 3951c364..10519009 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -29,6 +29,7 @@ import ( pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" "github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm" + "github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/pluginutils" dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" ) @@ -159,8 +160,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { return nil, errors.Wrap(err, "Can't read device folder") } - dat, err := os.ReadFile(path.Join(dp.sysfsDir, f.Name(), "device/sriov_numvfs")) - isPFwithVFs := (err == nil && strings.TrimSpace(string(dat)) != "0") + isPFwithVFs := pluginutils.IsSriovPFwithVFs(path.Join(dp.sysfsDir, f.Name())) for _, drmFile := range drmFiles { if dp.controlDeviceReg.MatchString(drmFile.Name()) { diff --git a/cmd/internal/pluginutils/sriov.go b/cmd/internal/pluginutils/sriov.go new file mode 100644 index 00000000..b3e6d1a4 --- /dev/null +++ b/cmd/internal/pluginutils/sriov.go @@ -0,0 +1,31 @@ +// Copyright 2021 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pluginutils + +import ( + "os" + "path" + "strings" +) + +func IsSriovPFwithVFs(pfpath string) bool { + dat, err := os.ReadFile(path.Join(pfpath, "device/sriov_numvfs")) + + if err == nil && strings.TrimSpace(string(dat)) != "0" { + return true + } + + return false +}