gpu: Add numa node mapping label for GPUs

Signed-off-by: Tuomas Katila <tuomas.katila@intel.com>
This commit is contained in:
Tuomas Katila 2022-03-09 11:28:46 +02:00
parent 21a3a931e4
commit bdd72c8cf7
3 changed files with 214 additions and 0 deletions

View File

@ -34,12 +34,15 @@ name | type | description|
|`gpu.intel.com/cards`| string | list of card names separated by '`.`'. The names match host `card*`-folders under `/sys/class/drm/`. Deprecated, use `gpu-numbers`.
|`gpu.intel.com/gpu-numbers`| string | list of numbers separated by '`.`'. The numbers correspond to device file numbers for the primary nodes of given GPUs in kernel DRI subsystem, listed as `/dev/dri/card<num>` in devfs, and `/sys/class/drm/card<num>` in sysfs.
|`gpu.intel.com/tiles`| number | sum of all detected GPU tiles in the system.
|`gpu.intel.com/numa-gpu-map`| string | list of numa node to gpu mappings.
If the value of the `gpu-numbers` label would not fit into the 63 character length limit, you will also get labels `gpu-numbers2`,
`gpu-numbers3`... until all the gpu numbers have been labeled.
The tile count `gpu.intel.com/tiles` describes the total amount of tiles on the system. System is expected to be homogeneous, and thus the number of tiles per GPU can be calculated by dividing the tile count with GPU count.
The `numa-gpu-map` label is a list of numa to gpu mapping items separated by `_`. Each list item has a numa node id combined with a list of gpu indices. e.g. 0-1.2.3 would mean: numa node 0 has gpus 1, 2 and 3. More complex example would be: 0-0.1_1-3.4 where numa node 0 would have gpus 0 and 1, and numa node 1 would have gpus 3 and 4. As with `gpu-numbers`, this label will be extended to multiple labels if the length of the value exceeds the max label length.
## PCI-groups (optional)
GPUs which share the same pci paths under `/sys/devices/pci*` can be grouped into a label. GPU nums are separated by '`.`' and

View File

@ -37,6 +37,7 @@ const (
millicoreLabelName = "millicores"
pciGroupLabelName = "pci-groups"
tilesLabelName = "tiles"
numaMappingName = "numa-gpu-map"
millicoresPerGPU = 1000
memoryOverrideEnv = "GPU_MEMORY_OVERRIDE"
memoryReservedEnv = "GPU_MEMORY_RESERVED"
@ -190,6 +191,25 @@ func (l *labeler) getTileCount(gpuName string) (numTiles uint64) {
return uint64(len(files))
}
// getNumaNode reads the cards numa node.
func (l *labeler) getNumaNode(gpuName string) int {
filePath := filepath.Join(l.sysfsDRMDir, gpuName, "device/numa_node")
data, err := os.ReadFile(filePath)
if err != nil {
klog.Warning("Can't read file: ", err)
return -1
}
numa, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
if err != nil {
klog.Warning("Can't convert numa_node: ", err)
return -1
}
return int(numa)
}
// addNumericLabel creates a new label if one doesn't exist. Else the new value is added to the previous value.
func (lm labelMap) addNumericLabel(labelName string, valueToAdd int64) {
value := int64(0)
@ -353,6 +373,8 @@ func (l *labeler) createLabels() error {
gpuNumList := []string{}
tileCount := 0
numaMapping := make(map[int][]string)
for _, gpuName := range gpuNameList {
gpuNum := ""
// extract gpu number as a string. scan() has already checked name syntax
@ -367,6 +389,17 @@ func (l *labeler) createLabels() error {
memoryAmount := l.getMemoryAmount(gpuName, numTiles)
gpuNumList = append(gpuNumList, gpuName[4:])
// get numa node of the GPU
numaNode := l.getNumaNode(gpuName)
if numaNode >= 0 {
// and store the gpu under that node id
numaList := numaMapping[numaNode]
numaList = append(numaList, gpuNum)
numaMapping[numaNode] = numaList
}
// try to add capability labels
l.createCapabilityLabels(gpuNum, numTiles)
@ -390,6 +423,18 @@ func (l *labeler) createLabels() error {
l.labels[labelNamespace+gpuNumListLabelName+strconv.FormatInt(int64(i+1), 10)] = gpuNumLists[i]
}
if len(numaMapping) > 0 {
// add numa node mapping to labels: gpu.intel.com/numa-gpu-map="0-0.1.2.3_1-4.5.6.7"
numaMappingLabel := createNumaNodeMappingLabel(numaMapping)
numaMappingLabelList := split(numaMappingLabel, labelMaxLength)
l.labels[labelNamespace+numaMappingName] = numaMappingLabelList[0]
for i := 1; i < len(numaMappingLabelList); i++ {
l.labels[labelNamespace+numaMappingName+strconv.FormatInt(int64(i+1), 10)] = numaMappingLabelList[i]
}
}
// all GPUs get default number of millicores (1000)
l.labels.addNumericLabel(labelNamespace+millicoreLabelName, int64(millicoresPerGPU*gpuCount))
@ -408,6 +453,27 @@ func (l *labeler) createLabels() error {
return nil
}
func createNumaNodeMappingLabel(mapping map[int][]string) string {
parts := []string{}
numas := []int{}
for numaNode := range mapping {
numas = append(numas, numaNode)
}
sort.Ints(numas)
for numaNode := range numas {
gpus := mapping[numaNode]
numaString := strconv.FormatInt(int64(numaNode), 10)
gpusString := strings.Join(gpus, ".")
parts = append(parts, numaString+"-"+gpusString)
}
return strings.Join(parts, "_")
}
func (l *labeler) printLabels() {
for key, val := range l.labels {
fmt.Println(key + "=" + val)

View File

@ -493,6 +493,151 @@ func getTestCases() []testcase {
"gpu.intel.com/tiles": "3",
},
},
{
sysfsdirs: []string{
"card0/device/drm/card0",
"card0/gt/gt0",
"card1/device/drm/card1",
"card1/gt/gt0",
},
sysfsfiles: map[string][]byte{
"card0/device/vendor": []byte("0x8086"),
"card0/device/numa_node": []byte("0"),
"card0/lmem_total_bytes": []byte("8000"),
"card1/device/vendor": []byte("0x8086"),
"card1/lmem_total_bytes": []byte("8000"),
"card1/device/numa_node": []byte("1"),
},
name: "successful labeling with two cards and numa node info",
memoryOverride: 16000000000,
capabilityFile: map[string][]byte{
"0/i915_capabilities": []byte(
"platform: new\n" +
"gen: 9"),
"1/i915_capabilities": []byte(
"platform: newnew\n" +
"gen: 9"),
},
expectedRetval: nil,
expectedLabels: labelMap{
"gpu.intel.com/graphics_version": "9",
"gpu.intel.com/media_version": "9",
"gpu.intel.com/millicores": "2000",
"gpu.intel.com/memory.max": "16000",
"gpu.intel.com/platform_new.count": "1",
"gpu.intel.com/platform_new.present": "true",
"gpu.intel.com/platform_new.tiles": "1",
"gpu.intel.com/platform_newnew.count": "1",
"gpu.intel.com/platform_newnew.present": "true",
"gpu.intel.com/platform_newnew.tiles": "1",
"gpu.intel.com/platform_gen": "9",
"gpu.intel.com/gpu-numbers": "0.1",
"gpu.intel.com/cards": "card0.card1",
"gpu.intel.com/tiles": "2",
"gpu.intel.com/numa-gpu-map": "0-0_1-1",
},
},
{
sysfsdirs: []string{
"card0/device/drm/card0",
"card1/device/drm/card1",
"card2/device/drm/card2",
"card3/device/drm/card3",
"card4/device/drm/card4",
"card5/device/drm/card5",
"card6/device/drm/card6",
"card7/device/drm/card7",
"card8/device/drm/card8",
"card9/device/drm/card9",
"card10/device/drm/card10",
"card11/device/drm/card11",
"card12/device/drm/card12",
"card13/device/drm/card13",
"card14/device/drm/card14",
"card15/device/drm/card15",
"card16/device/drm/card16",
"card17/device/drm/card17",
"card18/device/drm/card18",
"card19/device/drm/card19",
"card20/device/drm/card20",
"card21/device/drm/card21",
"card22/device/drm/card22",
"card23/device/drm/card23",
"card24/device/drm/card24",
"card25/device/drm/card25",
"card26/device/drm/card26",
},
sysfsfiles: map[string][]byte{
"card0/device/vendor": []byte("0x8086"),
"card0/device/numa_node": []byte("0"),
"card1/device/vendor": []byte("0x8086"),
"card1/device/numa_node": []byte("0"),
"card2/device/vendor": []byte("0x8086"),
"card2/device/numa_node": []byte("0"),
"card3/device/vendor": []byte("0x8086"),
"card3/device/numa_node": []byte("0"),
"card4/device/vendor": []byte("0x8086"),
"card4/device/numa_node": []byte("0"),
"card5/device/vendor": []byte("0x8086"),
"card5/device/numa_node": []byte("0"),
"card6/device/vendor": []byte("0x8086"),
"card6/device/numa_node": []byte("0"),
"card7/device/vendor": []byte("0x8086"),
"card7/device/numa_node": []byte("0"),
"card8/device/vendor": []byte("0x8086"),
"card8/device/numa_node": []byte("0"),
"card9/device/vendor": []byte("0x8086"),
"card9/device/numa_node": []byte("2"),
"card10/device/vendor": []byte("0x8086"),
"card10/device/numa_node": []byte("2"),
"card11/device/vendor": []byte("0x8086"),
"card11/device/numa_node": []byte("2"),
"card12/device/vendor": []byte("0x8086"),
"card12/device/numa_node": []byte("2"),
"card13/device/vendor": []byte("0x8086"),
"card13/device/numa_node": []byte("1"),
"card14/device/vendor": []byte("0x8086"),
"card14/device/numa_node": []byte("1"),
"card15/device/vendor": []byte("0x8086"),
"card15/device/numa_node": []byte("1"),
"card16/device/vendor": []byte("0x8086"),
"card16/device/numa_node": []byte("1"),
"card17/device/vendor": []byte("0x8086"),
"card17/device/numa_node": []byte("1"),
"card18/device/vendor": []byte("0x8086"),
"card18/device/numa_node": []byte("1"),
"card19/device/vendor": []byte("0x8086"),
"card19/device/numa_node": []byte("1"),
"card20/device/vendor": []byte("0x8086"),
"card20/device/numa_node": []byte("1"),
"card21/device/vendor": []byte("0x8086"),
"card21/device/numa_node": []byte("1"),
"card22/device/vendor": []byte("0x8086"),
"card22/device/numa_node": []byte("3"),
"card23/device/vendor": []byte("0x8086"),
"card23/device/numa_node": []byte("3"),
"card24/device/vendor": []byte("0x8086"),
"card24/device/numa_node": []byte("3"),
"card25/device/vendor": []byte("0x8086"),
"card25/device/numa_node": []byte("3"),
"card26/device/vendor": []byte("0x8086"),
"card26/device/numa_node": []byte("3"),
},
name: "successful labeling with two cards and numa node info",
memoryOverride: 16000000000,
capabilityFile: map[string][]byte{},
expectedRetval: nil,
expectedLabels: labelMap{
"gpu.intel.com/cards": "card0.card1.card10.card11.card12.card13.card14.card15.card16.ca",
"gpu.intel.com/gpu-numbers": "0.1.10.11.12.13.14.15.16.17.18.19.2.20.21.22.23.24.25.26.3.4.5.",
"gpu.intel.com/gpu-numbers2": "6.7.8.9",
"gpu.intel.com/memory.max": "432000000000",
"gpu.intel.com/millicores": "27000",
"gpu.intel.com/numa-gpu-map": "0-0.1.2.3.4.5.6.7.8_1-13.14.15.16.17.18.19.20.21_2-10.11.12.9_3",
"gpu.intel.com/numa-gpu-map2": "-22.23.24.25.26",
"gpu.intel.com/tiles": "27",
},
},
}
}