From 5b5180ae009a7b5d047613878b4469ae7239fb7c Mon Sep 17 00:00:00 2001 From: Ukri Niemimuukko Date: Wed, 21 Oct 2020 14:01:39 +0300 Subject: [PATCH] gpu_nfdhook memory amount reading from sysfs This adds reading of the GPU memory amount from the sysfs. As a fallback the environment variable GPU_MEMORY_OVERRIDE remains. Another environment variable GPU_MEMORY_RESERVED can be used to reserve a dedicated byte amount outside of kubernetes usage. Signed-off-by: Ukri Niemimuukko --- cmd/gpu_nfdhook/README.md | 13 +++-- cmd/gpu_nfdhook/labeler.go | 67 ++++++++++++++++++------ cmd/gpu_nfdhook/labeler_test.go | 91 ++++++++++++++++++++++++++++++++- cmd/gpu_nfdhook/main.go | 6 +-- 4 files changed, 153 insertions(+), 24 deletions(-) diff --git a/cmd/gpu_nfdhook/README.md b/cmd/gpu_nfdhook/README.md index b92c8db9..2309011c 100644 --- a/cmd/gpu_nfdhook/README.md +++ b/cmd/gpu_nfdhook/README.md @@ -11,10 +11,13 @@ types. Selected numeric labels can be turned into kubernetes extended resources by the NFD, allowing for finer grained resource management for GPU-using PODs. In the NFD deployment, the hook requires /host-sys -folder to have the host /sys --folder content mounted, and /host-dev to have the host /dev/ -folder content +-folder content mounted, and /host-dev to have the host /dev -folder content mounted. Write access is not necessary. -There is one supported environment variable named GPU_MEMORY_OVERRIDE, which is -supposed to hold a numeric value. For systems with GPUs which do not support -reading the GPU memory amount, the environment variable memory value is turned -into a GPU memory amount label instead of a read value. \ No newline at end of file +GPU memory amount is read from sysfs gt/gt* files and turned into a label. +There are two supported environment variables named GPU_MEMORY_OVERRIDE and +GPU_MEMORY_RESERVED. Both are supposed to hold numeric values. For systems with +older kernel drivers or GPUs which do not support reading the GPU memory +amount, the GPU_MEMORY_OVERRIDE environment variable value is turned into a GPU +memory amount label instead of a read value. GPU_MEMORY_RESERVED value will be +scoped out from the GPU memory amount found from sysfs. diff --git a/cmd/gpu_nfdhook/labeler.go b/cmd/gpu_nfdhook/labeler.go index b6000733..8115ae02 100644 --- a/cmd/gpu_nfdhook/labeler.go +++ b/cmd/gpu_nfdhook/labeler.go @@ -35,6 +35,7 @@ const ( millicoreLabelName = "millicores" millicoresPerGPU = 1000 memoryOverrideEnv = "GPU_MEMORY_OVERRIDE" + memoryReservedEnv = "GPU_MEMORY_RESERVED" gpuDeviceRE = `^card[0-9]+$` controlDeviceRE = `^controlD[0-9]+$` vendorString = "0x8086" @@ -43,8 +44,8 @@ const ( type labelMap map[string]string type labeler struct { - sysfsDir string - devfsDir string + sysfsDRMDir string + devfsDRIDir string debugfsDRIDir string gpuDeviceReg *regexp.Regexp @@ -52,10 +53,10 @@ type labeler struct { labels labelMap } -func newLabeler(sysfsDir, devfsDir, debugfsDRIDir string) *labeler { +func newLabeler(sysfsDRMDir, devfsDRIDir, debugfsDRIDir string) *labeler { return &labeler{ - sysfsDir: sysfsDir, - devfsDir: devfsDir, + sysfsDRMDir: sysfsDRMDir, + devfsDRIDir: devfsDRIDir, debugfsDRIDir: debugfsDRIDir, gpuDeviceReg: regexp.MustCompile(gpuDeviceRE), controlDeviceReg: regexp.MustCompile(controlDeviceRE), @@ -64,7 +65,7 @@ func newLabeler(sysfsDir, devfsDir, debugfsDRIDir string) *labeler { } func (l *labeler) scan() ([]string, error) { - files, err := ioutil.ReadDir(l.sysfsDir) + files, err := ioutil.ReadDir(l.sysfsDRMDir) gpuNameList := []string{} if err != nil { @@ -77,7 +78,7 @@ func (l *labeler) scan() ([]string, error) { continue } - dat, err := ioutil.ReadFile(path.Join(l.sysfsDir, f.Name(), "device/vendor")) + dat, err := ioutil.ReadFile(path.Join(l.sysfsDRMDir, f.Name(), "device/vendor")) if err != nil { klog.Warning("Skipping. Can't read vendor file: ", err) continue @@ -88,7 +89,7 @@ func (l *labeler) scan() ([]string, error) { continue } - drmFiles, err := ioutil.ReadDir(path.Join(l.sysfsDir, f.Name(), "device/drm")) + drmFiles, err := ioutil.ReadDir(path.Join(l.sysfsDRMDir, f.Name(), "device/drm")) if err != nil { return gpuNameList, errors.Wrap(err, "Can't read device folder") } @@ -98,7 +99,7 @@ func (l *labeler) scan() ([]string, error) { //Skipping possible drm control node continue } - devPath := path.Join(l.devfsDir, drmFile.Name()) + devPath := path.Join(l.devfsDRIDir, drmFile.Name()) if _, err := os.Stat(devPath); err != nil { continue } @@ -111,11 +112,8 @@ func (l *labeler) scan() ([]string, error) { return gpuNameList, nil } -// getMemoryValues reads the GPU memory amount from the system. -func (l *labeler) getMemoryAmount( /*cardNum*/ string) uint64 { - // reading GPU local memory amount is not yet available in the driver, - // so just return the environment variable value - envValue := os.Getenv(memoryOverrideEnv) +func getEnvVarNumber(envVarName string) uint64 { + envValue := os.Getenv(envVarName) if envValue != "" { val, err := strconv.ParseUint(envValue, 10, 64) if err == nil { @@ -125,6 +123,45 @@ func (l *labeler) getMemoryAmount( /*cardNum*/ string) uint64 { return 0 } +func fallback() uint64 { + return getEnvVarNumber(memoryOverrideEnv) +} + +// getMemoryAmount reads the GPU memory amount from the system. +func (l *labeler) getMemoryAmount(gpuName string) uint64 { + reserved := getEnvVarNumber(memoryReservedEnv) + filePath := filepath.Join(l.sysfsDRMDir, gpuName, "gt/gt*/addr_range") + + files, err := filepath.Glob(filePath) + if err != nil { + klog.V(4).Info("Can't read sysfs folder", err) + return fallback() + } + + mem := uint64(0) + for _, fileName := range files { + dat, err := ioutil.ReadFile(fileName) + if err != nil { + klog.Warning("Skipping. Can't read file: ", err) + continue + } + + n, err := strconv.ParseUint(strings.TrimSpace(string(dat)), 10, 64) + if err != nil { + klog.Warning("Skipping. Can't convert addr_range: ", err) + continue + } + + mem += n + } + + if mem == 0 { + return fallback() + } + + return mem - reserved +} + // addNumericLabel creates a new label if one doesn't exist. Else the new value is added to the previous value. func (lm labelMap) addNumericLabel(labelName string, valueToAdd int64) { value := int64(0) @@ -193,7 +230,7 @@ func (l *labeler) createLabels() error { l.createCapabilityLabels(gpuNum) // read the memory amount to find a proper max allocation value - l.labels.addNumericLabel(labelNamespace+"memory.max", int64(l.getMemoryAmount(gpuNum))) + l.labels.addNumericLabel(labelNamespace+"memory.max", int64(l.getMemoryAmount(gpuName))) } gpuCount := len(gpuNameList) // add gpu list label (example: "card0.card1.card2") diff --git a/cmd/gpu_nfdhook/labeler_test.go b/cmd/gpu_nfdhook/labeler_test.go index 0c60fae3..e4af9c6a 100644 --- a/cmd/gpu_nfdhook/labeler_test.go +++ b/cmd/gpu_nfdhook/labeler_test.go @@ -29,6 +29,7 @@ type testcase struct { devfsdirs []string name string memoryOverride uint64 + memoryReserved uint64 capabilityFile map[string][]byte expectedRetval error expectedLabels labelMap @@ -37,6 +38,90 @@ type testcase struct { //nolint:funlen func getTestCases() []testcase { return []testcase{ + { + sysfsdirs: []string{ + "card0/device/drm/card0", + "card0/gt/gt0", + }, + sysfsfiles: map[string][]byte{ + "card0/device/vendor": []byte("0x8086"), + "card0/gt/gt0/addr_range": []byte("8086"), + }, + devfsdirs: []string{"card0"}, + name: "successful labeling via gt0/addr_range", + memoryOverride: 16000000000, + capabilityFile: map[string][]byte{ + "0/i915_capabilities": []byte( + "platform: new\n" + + "gen: 9"), + }, + expectedRetval: nil, + expectedLabels: labelMap{ + "gpu.intel.com/millicores": "1000", + "gpu.intel.com/memory.max": "8086", + "gpu.intel.com/platform_new.count": "1", + "gpu.intel.com/platform_new.present": "true", + "gpu.intel.com/platform_gen": "9", + "gpu.intel.com/cards": "card0", + }, + }, + { + sysfsdirs: []string{ + "card0/device/drm/card0", + "card0/gt/gt0", + "card0/gt/gt1", + }, + sysfsfiles: map[string][]byte{ + "card0/device/vendor": []byte("0x8086"), + "card0/gt/gt0/addr_range": []byte("8086"), + "card0/gt/gt1/addr_range": []byte("2"), + }, + devfsdirs: []string{"card0"}, + name: "successful labeling via gt0/addr_range and gt1/addr_range", + memoryOverride: 16000000000, + capabilityFile: map[string][]byte{ + "0/i915_capabilities": []byte( + "platform: new\n" + + "gen: 9"), + }, + expectedRetval: nil, + expectedLabels: labelMap{ + "gpu.intel.com/millicores": "1000", + "gpu.intel.com/memory.max": "8088", + "gpu.intel.com/platform_new.count": "1", + "gpu.intel.com/platform_new.present": "true", + "gpu.intel.com/platform_gen": "9", + "gpu.intel.com/cards": "card0", + }, + }, + { + sysfsdirs: []string{ + "card0/device/drm/card0", + "card0/gt/gt0", + }, + sysfsfiles: map[string][]byte{ + "card0/device/vendor": []byte("0x8086"), + "card0/gt/gt0/addr_range": []byte("8086"), + }, + devfsdirs: []string{"card0"}, + name: "successful labeling via gt0/addr_range and reserved memory", + memoryOverride: 16000000000, + memoryReserved: 86, + capabilityFile: map[string][]byte{ + "0/i915_capabilities": []byte( + "platform: new\n" + + "gen: 9"), + }, + expectedRetval: nil, + expectedLabels: labelMap{ + "gpu.intel.com/millicores": "1000", + "gpu.intel.com/memory.max": "8000", + "gpu.intel.com/platform_new.count": "1", + "gpu.intel.com/platform_new.present": "true", + "gpu.intel.com/platform_gen": "9", + "gpu.intel.com/cards": "card0", + }, + }, { sysfsdirs: []string{ "card0/device/drm/card0", @@ -45,7 +130,7 @@ func getTestCases() []testcase { "card0/device/vendor": []byte("0x8086"), }, devfsdirs: []string{"card0"}, - name: "successful labeling", + name: "successful labeling via memory override", memoryOverride: 16000000000, capabilityFile: map[string][]byte{ "0/i915_capabilities": []byte( @@ -159,6 +244,7 @@ func TestLabeling(t *testing.T) { tc.createFiles(t, sysfs, devfs, root) os.Setenv(memoryOverrideEnv, strconv.FormatUint(tc.memoryOverride, 10)) + os.Setenv(memoryReservedEnv, strconv.FormatUint(tc.memoryReserved, 10)) labeler := newLabeler(sysfs, devfs, root) err = labeler.createLabels() @@ -172,6 +258,9 @@ func TestLabeling(t *testing.T) { for filename := range tc.capabilityFile { os.Remove(path.Join(root, filename)) } + for filename := range tc.sysfsfiles { + os.Remove(path.Join(sysfs, filename)) + } }) } } diff --git a/cmd/gpu_nfdhook/main.go b/cmd/gpu_nfdhook/main.go index 5dd0a7bc..2dd15452 100644 --- a/cmd/gpu_nfdhook/main.go +++ b/cmd/gpu_nfdhook/main.go @@ -23,13 +23,13 @@ import ( const ( sysfsDirectory = "/host-sys" devfsDirectory = "/host-dev" - sysfsDrmDirectory = sysfsDirectory + "/class/drm" - devfsDriDirectory = devfsDirectory + "/dri" + sysfsDRMDirectory = sysfsDirectory + "/class/drm" + devfsDRIDirectory = devfsDirectory + "/dri" debugfsDRIDirectory = sysfsDirectory + "/kernel/debug/dri" ) func main() { - l := newLabeler(sysfsDrmDirectory, devfsDriDirectory, debugfsDRIDirectory) + l := newLabeler(sysfsDRMDirectory, devfsDRIDirectory, debugfsDRIDirectory) err := l.createLabels() if err != nil { klog.Errorf("%+v", err)