diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9b6da970..2c090d00 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -105,6 +105,7 @@ jobs: image: - intel-fpga-admissionwebhook - intel-fpga-initcontainer + - intel-gpu-fakedev - intel-gpu-initcontainer - intel-gpu-plugin - intel-fpga-plugin diff --git a/.gitignore b/.gitignore index c950689a..d448e9d0 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ cmd/fpga_crihook/fpga_crihook cmd/dlb_plugin/dlb_plugin cmd/fpga_plugin/fpga_plugin cmd/fpga_tool/fpga_tool +cmd/gpu_fakedev/gpu_fakedev cmd/gpu_nfdhook/gpu_nfdhook cmd/gpu_plugin/gpu_plugin cmd/iaa_plugin/iaa_plugin @@ -32,4 +33,4 @@ _build _work *.tgz -charts/operator/crds \ No newline at end of file +charts/operator/crds diff --git a/build/docker/templates/intel-gpu-fakedev.Dockerfile.in b/build/docker/templates/intel-gpu-fakedev.Dockerfile.in new file mode 100644 index 00000000..60073450 --- /dev/null +++ b/build/docker/templates/intel-gpu-fakedev.Dockerfile.in @@ -0,0 +1,8 @@ +#define _ENTRYPOINT_ /usr/local/bin/intel_gpu_fakedev +ARG CMD=gpu_fakedev + +#include "default_plugin.docker" + +LABEL name='intel-gpu-fakedev' +LABEL summary='Fake device file generator for IntelĀ® GPU plugin' +LABEL description='Fake device file generator provides fake sysfs+devfs content for Intel GPU plugin from its initcontainer, for scalability testing' diff --git a/cmd/gpu_fakedev/README.md b/cmd/gpu_fakedev/README.md new file mode 100644 index 00000000..94938ea4 --- /dev/null +++ b/cmd/gpu_fakedev/README.md @@ -0,0 +1,47 @@ +# Fake (GPU) device file generator + +Table of Contents +* [Introduction](#introduction) +* [Configuration](#configuration) +* [Potential improvements](#potential-improvements) +* [Related tools](#related-tools) + +## Introduction + +This is a tool for generating (large number of) fake device files for +k8s device scheduling scalability testing. But it can also be used +just to test (GPU) device plugin functionality without having +corresponding device HW. + +Its "intel-gpu-fakedev" container is intended to be run as first init +container in a device plugin pod, so that device plugin (and its NFD +labeler) see the fake (sysfs + devfs) files generated by the tool, +instead of real host sysfs and devfs content. + +## Configuration + +[Configs](configs/) subdirectory contains example JSON configuration +file(s) for the generator. Currently there's only one example JSON +file, but each new device variant adding feature(s) that have specific +support in device plugin, could have their own fake device config. + +## Potential improvements + +If support for mixed device environment is needed, tool can be updated +to use node / configuration file mapping. Such mappings could be e.g. +in configuration files themselves as node name include / exlude lists, +and tool would use first configuration file matching the node it's +running on. For now, one would need to use different pod / config +specs for different nodes to achieve that... + +Currently JSON config file options and the generated files are tied to +what GPU plugin uses, but if needed, they could be changed to fake +also sysfs + devfs device files used by other plugins. + +## Related tools + +[fakedev-exporter](#https://github.com/intel/fakedev-exporter) project +can be used to schedule suitably configured fake workloads on the fake +devices, and to provide provide fake activity metrics for them to +Prometheus, that look like they were reported by real Prometheus +metric exporters for real workloads running on real devices. diff --git a/cmd/gpu_fakedev/configs/8x-DG1.json b/cmd/gpu_fakedev/configs/8x-DG1.json new file mode 100644 index 00000000..e9840b23 --- /dev/null +++ b/cmd/gpu_fakedev/configs/8x-DG1.json @@ -0,0 +1,8 @@ +{ + "Info": "8x 4 GiB DG1 [Iris Xe MAX Graphics] GPUs", + "DevCount": 8, + "DevMemSize": 4294967296, + "Capabilities": { + "platform": "fake_DG1", + } +} diff --git a/cmd/gpu_fakedev/gpu_fakedev.go b/cmd/gpu_fakedev/gpu_fakedev.go new file mode 100644 index 00000000..ca075c84 --- /dev/null +++ b/cmd/gpu_fakedev/gpu_fakedev.go @@ -0,0 +1,313 @@ +// Copyright 2021-2022 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//--------------------------------------------------------------- +// sysfs SPECIFICATION +// +// sys/class/drm/cardX/ +// sys/class/drm/cardX/lmem_total_bytes (gpu memory size, number) +// sys/class/drm/cardX/device/ +// sys/class/drm/cardX/device/vendor (0x8086) +// sys/class/drm/cardX/device/sriov_numvfs (PF only, number of VF GPUs, number) +// sys/class/drm/cardX/device/drm/ +// sys/class/drm/cardX/device/drm/cardX/ +// sys/class/drm/cardX/device/drm/renderD1XX/ +// sys/class/drm/cardX/device/numa_node (Numa node index[1], number) +// [1] indexing these: /sys/devices/system/node/nodeX/ +//--------------------------------------------------------------- +// devfs SPECIFICATION +// +// dev/dri/cardX +// dev/dri/renderD1XX +//--------------------------------------------------------------- + +package main + +import ( + "encoding/json" + "errors" + "flag" + "fmt" + "io/fs" + "log" + "os" + + "golang.org/x/sys/unix" +) + +const ( + dirMode = 0775 + fileMode = 0644 + cardBase = 0 + renderBase = 128 + maxDevs = 128 + sysfsPath = "sys" + devfsPath = "dev" + mib = 1024.0 * 1024.0 + // null device major, minor on linux. + devNullMajor = 1 + devNullMinor = 3 + devNullType = unix.S_IFCHR +) + +var verbose bool + +type genOptions struct { + Capabilities map[string]string // device capabilities mapping for NFD hook + Info string // verbal config description + DevCount int // how many devices to fake + TilesPerDev int // per-device tile count + DevMemSize int // available per-device device-local memory, in bytes + DevsPerNode int // How many devices per Numa node + VfsPerPf int // How many SR-IOV VFs per PF + // fields for counting what was generated + files int + dirs int + devs int +} + +func addSysfsDriTree(root string, opts *genOptions, i int) error { + card := cardBase + i + base := fmt.Sprintf("%s/class/drm/card%d", root, card) + + if err := os.MkdirAll(base, dirMode); err != nil { + return err + } + opts.dirs++ + + data := []byte(fmt.Sprintf("%d", opts.DevMemSize)) + file := fmt.Sprintf("%s/lmem_total_bytes", base) + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + opts.files++ + + path := fmt.Sprintf("%s/device/drm/card%d", base, card) + if err := os.MkdirAll(path, dirMode); err != nil { + return err + } + opts.dirs++ + + path = fmt.Sprintf("%s/device/drm/renderD%d", base, renderBase+i) + if err := os.Mkdir(path, dirMode); err != nil { + return err + } + opts.dirs++ + + data = []byte("0x8086") + file = fmt.Sprintf("%s/device/vendor", base) + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + opts.files++ + + node := 0 + if opts.DevsPerNode > 0 { + node = i / opts.DevsPerNode + } + + data = []byte(fmt.Sprintf("%d", node)) + file = fmt.Sprintf("%s/device/numa_node", base) + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + opts.files++ + + if opts.VfsPerPf > 0 && i%(opts.VfsPerPf+1) == 0 { + data = []byte(fmt.Sprintf("%d", opts.VfsPerPf)) + file = fmt.Sprintf("%s/device/sriov_numvfs", base) + + if err := os.WriteFile(file, data, fileMode); err != nil { + return err + } + opts.files++ + } + + for tile := 0; tile < opts.TilesPerDev; tile++ { + path := fmt.Sprintf("%s/gt/gt%d", base, tile) + if err := os.MkdirAll(path, dirMode); err != nil { + return err + } + opts.dirs++ + } + + return nil +} + +func addDevfsDriTree(root string, opts *genOptions, i int) error { + base := fmt.Sprintf("%s/dri", root) + if err := os.MkdirAll(base, dirMode); err != nil { + return err + } + opts.dirs++ + + mode := uint32(fileMode | devNullType) + devid := int(unix.Mkdev(uint32(devNullMajor), uint32(devNullMinor))) + + file := fmt.Sprintf("%s/card%d", base, cardBase+i) + if err := unix.Mknod(file, mode, devid); err != nil { + return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w", + devNullMajor, devNullMinor, file, err) + } + opts.devs++ + + file = fmt.Sprintf("%s/renderD%d", base, renderBase+i) + if err := unix.Mknod(file, mode, devid); err != nil { + return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w", + devNullMajor, devNullMinor, file, err) + } + opts.devs++ + + return nil +} + +func addDebugfsDriTree(root string, opts *genOptions, i int) error { + base := fmt.Sprintf("%s/kernel/debug/dri/%d", root, i) + if err := os.MkdirAll(base, dirMode); err != nil { + return err + } + opts.dirs++ + + path := fmt.Sprintf("%s/i915_capabilities", base) + f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileMode) + + if err != nil { + return err + } + defer f.Close() + opts.files++ + + // keys are in random order which provides extra testing for NFD label parsing code + for key, value := range opts.Capabilities { + line := fmt.Sprintf("%s: %s\n", key, value) + if _, err = f.WriteString(line); err != nil { + return err + } + } + + return nil +} + +func removeExistingDir(path, name string) { + entries, err := os.ReadDir(path) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + log.Fatalf("ERROR: ReadDir() failed on fake %s path '%s': %v", name, path, err) + } + + if len(entries) == 0 { + return + } + + if name == "sysfs" && len(entries) > 2 { + log.Fatalf("ERROR: >2 entries in '%s' - real sysfs?", path) + } + + if name == "devfs" && (entries[0].Name() != "dri" || len(entries) > 1) { + log.Fatalf("ERROR: >1 entries in '%s', or '%s' != 'dri' - real devfs?", path, entries[0].Name()) + } + + log.Printf("WARN: removing already existing fake %s path '%s'", name, path) + + if err = os.RemoveAll(path); err != nil { + log.Fatalf("ERROR: removing existing %s in '%s' failed: %v", name, path, err) + } +} + +// generateDriFiles generates the fake sysfs + debugfs + devfs dirs & files according to given options. +func generateDriFiles(opts genOptions) { + if opts.Info != "" { + log.Printf("Config: '%s'", opts.Info) + } + + removeExistingDir(devfsPath, "devfs") + removeExistingDir(sysfsPath, "sysfs") + log.Printf("Generating fake DRI device(s) sysfs, debugfs and devfs content under '%s' & '%s'", + sysfsPath, devfsPath) + + opts.dirs, opts.files = 0, 0 + for i := 0; i < opts.DevCount; i++ { + if err := addSysfsDriTree(sysfsPath, &opts, i); err != nil { + log.Fatalf("ERROR: dev-%d sysfs tree generation failed: %v", i, err) + } + + if err := addDebugfsDriTree(sysfsPath, &opts, i); err != nil { + log.Fatalf("ERROR: dev-%d debugfs tree generation failed: %v", i, err) + } + + if err := addDevfsDriTree(devfsPath, &opts, i); err != nil { + log.Fatalf("ERROR: dev-%d devfs tree generation failed: %v", i, err) + } + } + log.Printf("Done, created %d dirs, %d devices and %d files.", opts.dirs, opts.devs, opts.files) +} + +// getOptions parses options from given JSON file, validates and returns them. +func getOptions(name string) genOptions { + if name == "" { + log.Fatal("ERROR: no fake device spec provided") + } + + data, err := os.ReadFile(name) + if err != nil { + log.Fatalf("ERROR: reading JSON spec file '%s' failed: %v", name, err) + } + + if verbose { + log.Printf("Using fake device spec: %v\n", string(data)) + } + + var opts genOptions + if err = json.Unmarshal(data, &opts); err != nil { + log.Fatalf("ERROR: Unmarshaling JSON spec file '%s' failed: %v", name, err) + } + + if opts.DevCount < 1 || opts.DevCount > maxDevs { + log.Fatalf("ERROR: invalid device count: 1 <= %d <= %d", opts.DevCount, maxDevs) + } + + if opts.VfsPerPf > 0 { + if opts.TilesPerDev > 0 || opts.DevsPerNode > 0 { + log.Fatalf("ERROR: SR-IOV VFs (%d) with device tiles (%d) or Numa nodes (%d) is unsupported for faking", + opts.VfsPerPf, opts.TilesPerDev, opts.DevsPerNode) + } + + if opts.DevCount%(opts.VfsPerPf+1) != 0 { + log.Fatalf("ERROR: %d devices cannot be evenly split to between set of 1 SR-IOV PF + %d VFs", + opts.DevCount, opts.VfsPerPf) + } + } + + if opts.DevsPerNode > opts.DevCount { + log.Fatalf("ERROR: DevsPerNode (%d) > DevCount (%d)", opts.DevsPerNode, opts.DevCount) + } + + if opts.DevMemSize%mib != 0 { + log.Fatalf("ERROR: Invalid memory size (%f MiB), not even MiB", float64(opts.DevMemSize)/mib) + } + + return opts +} + +func main() { + var name string + + flag.StringVar(&name, "json", "", "JSON spec for fake device sysfs, debugfs and devfs content") + flag.BoolVar(&verbose, "verbose", false, "More verbose output") + flag.Parse() + + generateDriFiles(getOptions(name)) +}