Merge pull request #1116 from eero-t/gpu_fakedev

Add fake GPU device generator for scalability testing
This commit is contained in:
Ed Bartosh 2022-12-07 18:44:08 +02:00 committed by GitHub
commit b4c2bd3afe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 379 additions and 1 deletions

View File

@ -105,6 +105,7 @@ jobs:
image:
- intel-fpga-admissionwebhook
- intel-fpga-initcontainer
- intel-gpu-fakedev
- intel-gpu-initcontainer
- intel-gpu-plugin
- intel-fpga-plugin

3
.gitignore vendored
View File

@ -11,6 +11,7 @@ cmd/fpga_crihook/fpga_crihook
cmd/dlb_plugin/dlb_plugin
cmd/fpga_plugin/fpga_plugin
cmd/fpga_tool/fpga_tool
cmd/gpu_fakedev/gpu_fakedev
cmd/gpu_nfdhook/gpu_nfdhook
cmd/gpu_plugin/gpu_plugin
cmd/iaa_plugin/iaa_plugin
@ -32,4 +33,4 @@ _build
_work
*.tgz
charts/operator/crds
charts/operator/crds

View File

@ -0,0 +1,8 @@
#define _ENTRYPOINT_ /usr/local/bin/intel_gpu_fakedev
ARG CMD=gpu_fakedev
#include "default_plugin.docker"
LABEL name='intel-gpu-fakedev'
LABEL summary='Fake device file generator for Intel® GPU plugin'
LABEL description='Fake device file generator provides fake sysfs+devfs content for Intel GPU plugin from its initcontainer, for scalability testing'

47
cmd/gpu_fakedev/README.md Normal file
View File

@ -0,0 +1,47 @@
# Fake (GPU) device file generator
Table of Contents
* [Introduction](#introduction)
* [Configuration](#configuration)
* [Potential improvements](#potential-improvements)
* [Related tools](#related-tools)
## Introduction
This is a tool for generating (large number of) fake device files for
k8s device scheduling scalability testing. But it can also be used
just to test (GPU) device plugin functionality without having
corresponding device HW.
Its "intel-gpu-fakedev" container is intended to be run as first init
container in a device plugin pod, so that device plugin (and its NFD
labeler) see the fake (sysfs + devfs) files generated by the tool,
instead of real host sysfs and devfs content.
## Configuration
[Configs](configs/) subdirectory contains example JSON configuration
file(s) for the generator. Currently there's only one example JSON
file, but each new device variant adding feature(s) that have specific
support in device plugin, could have their own fake device config.
## Potential improvements
If support for mixed device environment is needed, tool can be updated
to use node / configuration file mapping. Such mappings could be e.g.
in configuration files themselves as node name include / exlude lists,
and tool would use first configuration file matching the node it's
running on. For now, one would need to use different pod / config
specs for different nodes to achieve that...
Currently JSON config file options and the generated files are tied to
what GPU plugin uses, but if needed, they could be changed to fake
also sysfs + devfs device files used by other plugins.
## Related tools
[fakedev-exporter](#https://github.com/intel/fakedev-exporter) project
can be used to schedule suitably configured fake workloads on the fake
devices, and to provide provide fake activity metrics for them to
Prometheus, that look like they were reported by real Prometheus
metric exporters for real workloads running on real devices.

View File

@ -0,0 +1,8 @@
{
"Info": "8x 4 GiB DG1 [Iris Xe MAX Graphics] GPUs",
"DevCount": 8,
"DevMemSize": 4294967296,
"Capabilities": {
"platform": "fake_DG1",
}
}

View File

@ -0,0 +1,313 @@
// Copyright 2021-2022 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//---------------------------------------------------------------
// sysfs SPECIFICATION
//
// sys/class/drm/cardX/
// sys/class/drm/cardX/lmem_total_bytes (gpu memory size, number)
// sys/class/drm/cardX/device/
// sys/class/drm/cardX/device/vendor (0x8086)
// sys/class/drm/cardX/device/sriov_numvfs (PF only, number of VF GPUs, number)
// sys/class/drm/cardX/device/drm/
// sys/class/drm/cardX/device/drm/cardX/
// sys/class/drm/cardX/device/drm/renderD1XX/
// sys/class/drm/cardX/device/numa_node (Numa node index[1], number)
// [1] indexing these: /sys/devices/system/node/nodeX/
//---------------------------------------------------------------
// devfs SPECIFICATION
//
// dev/dri/cardX
// dev/dri/renderD1XX
//---------------------------------------------------------------
package main
import (
"encoding/json"
"errors"
"flag"
"fmt"
"io/fs"
"log"
"os"
"golang.org/x/sys/unix"
)
const (
dirMode = 0775
fileMode = 0644
cardBase = 0
renderBase = 128
maxDevs = 128
sysfsPath = "sys"
devfsPath = "dev"
mib = 1024.0 * 1024.0
// null device major, minor on linux.
devNullMajor = 1
devNullMinor = 3
devNullType = unix.S_IFCHR
)
var verbose bool
type genOptions struct {
Capabilities map[string]string // device capabilities mapping for NFD hook
Info string // verbal config description
DevCount int // how many devices to fake
TilesPerDev int // per-device tile count
DevMemSize int // available per-device device-local memory, in bytes
DevsPerNode int // How many devices per Numa node
VfsPerPf int // How many SR-IOV VFs per PF
// fields for counting what was generated
files int
dirs int
devs int
}
func addSysfsDriTree(root string, opts *genOptions, i int) error {
card := cardBase + i
base := fmt.Sprintf("%s/class/drm/card%d", root, card)
if err := os.MkdirAll(base, dirMode); err != nil {
return err
}
opts.dirs++
data := []byte(fmt.Sprintf("%d", opts.DevMemSize))
file := fmt.Sprintf("%s/lmem_total_bytes", base)
if err := os.WriteFile(file, data, fileMode); err != nil {
return err
}
opts.files++
path := fmt.Sprintf("%s/device/drm/card%d", base, card)
if err := os.MkdirAll(path, dirMode); err != nil {
return err
}
opts.dirs++
path = fmt.Sprintf("%s/device/drm/renderD%d", base, renderBase+i)
if err := os.Mkdir(path, dirMode); err != nil {
return err
}
opts.dirs++
data = []byte("0x8086")
file = fmt.Sprintf("%s/device/vendor", base)
if err := os.WriteFile(file, data, fileMode); err != nil {
return err
}
opts.files++
node := 0
if opts.DevsPerNode > 0 {
node = i / opts.DevsPerNode
}
data = []byte(fmt.Sprintf("%d", node))
file = fmt.Sprintf("%s/device/numa_node", base)
if err := os.WriteFile(file, data, fileMode); err != nil {
return err
}
opts.files++
if opts.VfsPerPf > 0 && i%(opts.VfsPerPf+1) == 0 {
data = []byte(fmt.Sprintf("%d", opts.VfsPerPf))
file = fmt.Sprintf("%s/device/sriov_numvfs", base)
if err := os.WriteFile(file, data, fileMode); err != nil {
return err
}
opts.files++
}
for tile := 0; tile < opts.TilesPerDev; tile++ {
path := fmt.Sprintf("%s/gt/gt%d", base, tile)
if err := os.MkdirAll(path, dirMode); err != nil {
return err
}
opts.dirs++
}
return nil
}
func addDevfsDriTree(root string, opts *genOptions, i int) error {
base := fmt.Sprintf("%s/dri", root)
if err := os.MkdirAll(base, dirMode); err != nil {
return err
}
opts.dirs++
mode := uint32(fileMode | devNullType)
devid := int(unix.Mkdev(uint32(devNullMajor), uint32(devNullMinor)))
file := fmt.Sprintf("%s/card%d", base, cardBase+i)
if err := unix.Mknod(file, mode, devid); err != nil {
return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w",
devNullMajor, devNullMinor, file, err)
}
opts.devs++
file = fmt.Sprintf("%s/renderD%d", base, renderBase+i)
if err := unix.Mknod(file, mode, devid); err != nil {
return fmt.Errorf("NULL device (%d:%d) node creation failed for '%s': %w",
devNullMajor, devNullMinor, file, err)
}
opts.devs++
return nil
}
func addDebugfsDriTree(root string, opts *genOptions, i int) error {
base := fmt.Sprintf("%s/kernel/debug/dri/%d", root, i)
if err := os.MkdirAll(base, dirMode); err != nil {
return err
}
opts.dirs++
path := fmt.Sprintf("%s/i915_capabilities", base)
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileMode)
if err != nil {
return err
}
defer f.Close()
opts.files++
// keys are in random order which provides extra testing for NFD label parsing code
for key, value := range opts.Capabilities {
line := fmt.Sprintf("%s: %s\n", key, value)
if _, err = f.WriteString(line); err != nil {
return err
}
}
return nil
}
func removeExistingDir(path, name string) {
entries, err := os.ReadDir(path)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
log.Fatalf("ERROR: ReadDir() failed on fake %s path '%s': %v", name, path, err)
}
if len(entries) == 0 {
return
}
if name == "sysfs" && len(entries) > 2 {
log.Fatalf("ERROR: >2 entries in '%s' - real sysfs?", path)
}
if name == "devfs" && (entries[0].Name() != "dri" || len(entries) > 1) {
log.Fatalf("ERROR: >1 entries in '%s', or '%s' != 'dri' - real devfs?", path, entries[0].Name())
}
log.Printf("WARN: removing already existing fake %s path '%s'", name, path)
if err = os.RemoveAll(path); err != nil {
log.Fatalf("ERROR: removing existing %s in '%s' failed: %v", name, path, err)
}
}
// generateDriFiles generates the fake sysfs + debugfs + devfs dirs & files according to given options.
func generateDriFiles(opts genOptions) {
if opts.Info != "" {
log.Printf("Config: '%s'", opts.Info)
}
removeExistingDir(devfsPath, "devfs")
removeExistingDir(sysfsPath, "sysfs")
log.Printf("Generating fake DRI device(s) sysfs, debugfs and devfs content under '%s' & '%s'",
sysfsPath, devfsPath)
opts.dirs, opts.files = 0, 0
for i := 0; i < opts.DevCount; i++ {
if err := addSysfsDriTree(sysfsPath, &opts, i); err != nil {
log.Fatalf("ERROR: dev-%d sysfs tree generation failed: %v", i, err)
}
if err := addDebugfsDriTree(sysfsPath, &opts, i); err != nil {
log.Fatalf("ERROR: dev-%d debugfs tree generation failed: %v", i, err)
}
if err := addDevfsDriTree(devfsPath, &opts, i); err != nil {
log.Fatalf("ERROR: dev-%d devfs tree generation failed: %v", i, err)
}
}
log.Printf("Done, created %d dirs, %d devices and %d files.", opts.dirs, opts.devs, opts.files)
}
// getOptions parses options from given JSON file, validates and returns them.
func getOptions(name string) genOptions {
if name == "" {
log.Fatal("ERROR: no fake device spec provided")
}
data, err := os.ReadFile(name)
if err != nil {
log.Fatalf("ERROR: reading JSON spec file '%s' failed: %v", name, err)
}
if verbose {
log.Printf("Using fake device spec: %v\n", string(data))
}
var opts genOptions
if err = json.Unmarshal(data, &opts); err != nil {
log.Fatalf("ERROR: Unmarshaling JSON spec file '%s' failed: %v", name, err)
}
if opts.DevCount < 1 || opts.DevCount > maxDevs {
log.Fatalf("ERROR: invalid device count: 1 <= %d <= %d", opts.DevCount, maxDevs)
}
if opts.VfsPerPf > 0 {
if opts.TilesPerDev > 0 || opts.DevsPerNode > 0 {
log.Fatalf("ERROR: SR-IOV VFs (%d) with device tiles (%d) or Numa nodes (%d) is unsupported for faking",
opts.VfsPerPf, opts.TilesPerDev, opts.DevsPerNode)
}
if opts.DevCount%(opts.VfsPerPf+1) != 0 {
log.Fatalf("ERROR: %d devices cannot be evenly split to between set of 1 SR-IOV PF + %d VFs",
opts.DevCount, opts.VfsPerPf)
}
}
if opts.DevsPerNode > opts.DevCount {
log.Fatalf("ERROR: DevsPerNode (%d) > DevCount (%d)", opts.DevsPerNode, opts.DevCount)
}
if opts.DevMemSize%mib != 0 {
log.Fatalf("ERROR: Invalid memory size (%f MiB), not even MiB", float64(opts.DevMemSize)/mib)
}
return opts
}
func main() {
var name string
flag.StringVar(&name, "json", "", "JSON spec for fake device sysfs, debugfs and devfs content")
flag.BoolVar(&verbose, "verbose", false, "More verbose output")
flag.Parse()
generateDriFiles(getOptions(name))
}