webhook: remove mode of operation

fpga: make AFU resource name 63 char long

webhook: drop mode from README

webhook: extend mappings description

webhook: tighten CRD definitions

webhook: drop mapping to non-existing afuId

explicitly state mappings names can be in any format

use consistent terminology across fpga webhook and plugin
This commit is contained in:
Dmitry Rozhkov 2020-04-02 12:23:57 +03:00
parent 96d9e642e1
commit 6c2eacfae5
21 changed files with 542 additions and 663 deletions

View File

@ -30,16 +30,6 @@ The admission controller also keeps the user from bypassing namespaced mapping r
by denying admission of any pods that are trying to use internal knowledge of InterfaceID or
Bitstream ID environment variables used by the prestart hook.
The admission controller can operate in two separate modes - preprogrammed or orchestration programmed.
The mode must be chosen to match that of the [FPGA plugin](../fpga_plugin/README.md) configuraton, as
shown in the following table:
| FPGA plugin mode | matching admission controller mode |
|:---------------- |:---------------------------------- |
| region | orchestrated |
| af | preprogrammed |
# Dependencies
This component is one of a set of components that work together. You may also want to
@ -118,14 +108,6 @@ Register webhook
mutatingwebhookconfiguration.admissionregistration.k8s.io/fpga-mutator-webhook-cfg created
```
By default, the script deploys the webhook in a preprogrammed mode.
Use the option `--mode` script option to deploy the webhook in orchestrated mode:
```bash
$ ./scripts/webhook-deploy.sh --mode orchestrated
```
The script needs the CA bundle used for signing certificate requests in your cluster.
By default, the script fetches the bundle stored in the configmap
`extension-apiserver-authentication`. However, your cluster may use a different signing
@ -138,13 +120,38 @@ $ ./scripts/webhook-deploy.sh --ca-bundle-path /var/run/kubernetes/server-ca.crt
# Mappings
Requested FPGA resources are translated to AF resources. For example,
`fpga.intel.com/arria10.dcp1.1-nlb0` is translated to `fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b`.
Mappings is a an essential part of the setup that gives a flexible instrument to a cluster
administrator to manage FPGA bitstreams and to control access to them. Being a set of
custom resource definitions they are used to configure the way FPGA resource requests get
translated into actual resources provided by the cluster.
In orchestrated mode, `fpga.intel.com/arria10.dcp1.1-nlb0` gets translated to
`fpga.intel.com/region-9926ab6d6c925a68aabca7d84c545738`, and, the corresponding AF IDs are set in
environment variables for the container. The [FPGA CRI-O hook](../fpga_crihook/README.md)
then loads the requested bitstream to a region before the container is started.
For the following mapping
```yaml
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
metadata:
name: arria10.dcp1.1-nlb0
spec:
afuId: d8424dc4a4a3c413f89e433683f9040b
interfaceId: 9926ab6d6c925a68aabca7d84c545738
mode: af
```
requested FPGA resources are translated to AF resources. For example,
`fpga.intel.com/arria10.dcp1.1-nlb0` is translated to
`fpga.intel.com/9926ab6d6c925a68aabca7d84c54573d8424dc4a4a3c413f89e433683f9040b`.
The first 31 characters of the resource name part (`9926ab6d6c925a68aabca7d84c54573`)
is the first 31 characters of the region interface ID for Arria10 with DCP1.1
firmware. The next 32 characters (`d8424dc4a4a3c413f89e433683f9040b`) is an accelerator function ID.
The format of resource names (e.g. `arria10.dcp1.1-nlb0`) can be any and is up
to a cluster administrator.
The same mapping, but with its mode field set to `region`, translates
`fpga.intel.com/arria10.dcp1.1-nlb0` to `fpga.intel.com/region-9926ab6d6c925a68aabca7d84c545738`,
and the corresponding AF IDs are set in environment variables for the container.
The [FPGA CRI-O hook](../fpga_crihook/README.md) then loads the requested bitstream to a region
before the container is started.
Mappings of resource names are configured with objects of `AcceleratorFunction` and
`FpgaRegion` custom resource definitions found respectively in

View File

@ -42,7 +42,7 @@ type fpgaObjectKey struct {
}
type controller struct {
patcherManager *patcherManager
patcherManager patcherManager
informerFactory informers.SharedInformerFactory
afsSynced cache.InformerSynced
regionsSynced cache.InformerSynced
@ -52,7 +52,7 @@ type controller struct {
stopCh chan struct{}
}
func newController(patcherManager *patcherManager, config *rest.Config) (*controller, error) {
func newController(patcherManager patcherManager, config *rest.Config) (*controller, error) {
clientset, err := clientset.NewForConfig(config)
if err != nil {
return nil, errors.Wrap(err, "Failed to create REST clientset")
@ -172,11 +172,7 @@ func (c *controller) syncAfHandler(key string) error {
return nil
}
patcher, err := c.patcherManager.getPatcher(namespace)
if err != nil {
runtime.HandleError(errors.Wrapf(err, "can't get patcher for namespace %s", namespace))
return nil
}
patcher := c.patcherManager.getPatcher(namespace)
// Get the AcceleratorFunction resource with this namespace/name
af, err := c.afLister.AcceleratorFunctions(namespace).Get(name)
@ -206,11 +202,7 @@ func (c *controller) syncRegionHandler(key string) error {
return nil
}
patcher, err := c.patcherManager.getPatcher(namespace)
if err != nil {
runtime.HandleError(errors.Wrapf(err, "can't get patcher for namespace %s", namespace))
return nil
}
patcher := c.patcherManager.getPatcher(namespace)
// Get the FpgaRegion resource with this namespace/name
region, err := c.regionLister.FpgaRegions(namespace).Get(name)

View File

@ -63,11 +63,10 @@ func (l *fakeAfLister) List(selector labels.Selector) (ret []*v1.AcceleratorFunc
func TestSyncAfHandler(t *testing.T) {
tcases := []struct {
name string
key string
afLister *fakeAfLister
patcherManagerIsBroken bool
expectedErr bool
name string
key string
afLister *fakeAfLister
expectedErr bool
}{
{
name: "Wrong key format",
@ -87,11 +86,6 @@ func TestSyncAfHandler(t *testing.T) {
},
},
},
{
name: "Broken patcher manager",
key: "default/arria10-nlb0",
patcherManagerIsBroken: true,
},
{
name: "Unknown key",
key: "default/unknown",
@ -107,13 +101,7 @@ func TestSyncAfHandler(t *testing.T) {
}
for _, tt := range tcases {
pm, err := newPatcherManager(preprogrammed)
if err != nil {
t.Fatalf("Test case '%s': %+v", tt.name, err)
}
if tt.patcherManagerIsBroken {
pm.defaultMode = "broken"
}
pm := newPatcherManager()
c, err := newController(pm, &rest.Config{})
if err != nil {
t.Fatalf("Test case '%s': %+v", tt.name, err)
@ -162,11 +150,10 @@ func (l *fakeRegionLister) List(selector labels.Selector) (ret []*v1.FpgaRegion,
func TestSyncRegionHandler(t *testing.T) {
tcases := []struct {
name string
key string
patcherManagerIsBroken bool
regionLister *fakeRegionLister
expectedErr bool
name string
key string
regionLister *fakeRegionLister
expectedErr bool
}{
{
name: "Wrong key format",
@ -186,11 +173,6 @@ func TestSyncRegionHandler(t *testing.T) {
},
},
},
{
name: "Broken patcher manager",
key: "default/arria10",
patcherManagerIsBroken: true,
},
{
name: "Unknown key",
key: "default/unknown",
@ -206,13 +188,7 @@ func TestSyncRegionHandler(t *testing.T) {
}
for _, tt := range tcases {
pm, err := newPatcherManager(preprogrammed)
if err != nil {
t.Fatalf("Test case '%s': %+v", tt.name, err)
}
if tt.patcherManagerIsBroken {
pm.defaultMode = "broken"
}
pm := newPatcherManager()
c, err := newController(pm, &rest.Config{})
if err != nil {
t.Fatalf("Test case '%s': %+v", tt.name, err)
@ -328,7 +304,7 @@ func TestProcessNextWorkItem(t *testing.T) {
},
}
for _, tt := range tcases {
pm, _ := newPatcherManager(preprogrammed)
pm := newPatcherManager()
c, err := newController(pm, &rest.Config{})
if err != nil {
t.Fatalf("Test case '%s': %+v", tt.name, err)
@ -369,7 +345,7 @@ func TestRun(t *testing.T) {
}
for _, tt := range tcases {
pm := &patcherManager{}
pm := newPatcherManager()
c, err := newController(pm, &rest.Config{})
if err != nil {
t.Fatalf("Test case '%s': %+v", tt.name, err)
@ -404,7 +380,7 @@ func TestNewController(t *testing.T) {
config := &rest.Config{
Host: tt.configHost,
}
pm := &patcherManager{}
pm := newPatcherManager()
c, err := newController(pm, config)
if err != nil && !tt.expectedErr {
t.Errorf("Test case '%s': unexpected error: %+v", tt.name, err)

View File

@ -18,7 +18,6 @@ import (
"crypto/tls"
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"net/http"
"os"
@ -39,8 +38,6 @@ import (
)
const (
preprogrammed = "preprogrammed"
orchestrated = "orchestrated"
controllerThreadNum = 1
)
@ -69,7 +66,7 @@ func getTLSConfig(certFile string, keyFile string) *tls.Config {
}
}
func mutatePods(ar v1beta1.AdmissionReview, pm *patcherManager) *v1beta1.AdmissionResponse {
func mutatePods(ar v1beta1.AdmissionReview, pm patcherManager) *v1beta1.AdmissionResponse {
var ops []string
klog.V(4).Info("mutating pods")
@ -97,11 +94,7 @@ func mutatePods(ar v1beta1.AdmissionReview, pm *patcherManager) *v1beta1.Admissi
name = pod.ObjectMeta.GenerateName
}
klog.V(4).Infof("Received pod '%s' in name space '%s'", name, namespace)
patcher, err := pm.getPatcher(namespace)
if err != nil {
klog.Warningf("%+v", err)
return toAdmissionResponse(err)
}
patcher := pm.getPatcher(namespace)
reviewResponse := v1beta1.AdmissionResponse{}
reviewResponse.Allowed = true
@ -198,7 +191,7 @@ func serve(w http.ResponseWriter, r *http.Request, admit admitFunc) {
}
}
func makePodsHandler(pm *patcherManager) func(w http.ResponseWriter, r *http.Request) {
func makePodsHandler(pm patcherManager) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
serve(w, r, func(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse {
return mutatePods(ar, pm)
@ -211,7 +204,6 @@ func main() {
var master string
var certFile string
var keyFile string
var mode string
var config *rest.Config
var err error
@ -220,7 +212,6 @@ func main() {
flag.StringVar(&certFile, "tls-cert-file", certFile,
"File containing the x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert).")
flag.StringVar(&keyFile, "tls-private-key-file", keyFile, "File containing the x509 private key matching --tls-cert-file.")
flag.StringVar(&mode, "mode", preprogrammed, fmt.Sprintf("webhook mode: '%s' (default) or '%s'", preprogrammed, orchestrated))
flag.Parse()
if certFile == "" {
@ -248,18 +239,15 @@ func main() {
klog.Fatal("Failed to get cluster config ", err)
}
patcherManager, err := newPatcherManager(mode)
if err != nil {
klog.Fatalf("%+v", err)
}
pm := newPatcherManager()
controller, err := newController(patcherManager, config)
controller, err := newController(pm, config)
if err != nil {
klog.Fatalf("%+v", err)
}
go controller.run(controllerThreadNum)
http.HandleFunc("/pods", makePodsHandler(patcherManager))
http.HandleFunc("/pods", makePodsHandler(pm))
klog.V(4).Info("Webhook started")

View File

@ -24,6 +24,7 @@ import (
"strings"
"testing"
fpgav1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/fpga.intel.com/v1"
"k8s.io/api/admission/v1beta1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
@ -144,16 +145,39 @@ func TestMutatePods(t *testing.T) {
},
},
}
brokenPod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test-container",
Image: "test-image",
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"cpu": resource.MustParse("1"),
"fpga.intel.com/arria10": resource.MustParse("1"),
},
},
},
},
},
}
podRaw, err := json.Marshal(pod)
if err != nil {
t.Fatal(err)
}
brokenPodRaw, err := json.Marshal(brokenPod)
if err != nil {
t.Fatal(err)
}
tcases := []struct {
name string
mode string
ar v1beta1.AdmissionReview
expectedResponse bool
expectedAllowed bool
expectedPatchOps int
}{
{
@ -161,7 +185,6 @@ func TestMutatePods(t *testing.T) {
ar: v1beta1.AdmissionReview{
Request: &v1beta1.AdmissionRequest{},
},
mode: preprogrammed,
},
{
name: "admission request without object",
@ -170,8 +193,8 @@ func TestMutatePods(t *testing.T) {
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
},
},
mode: preprogrammed,
expectedResponse: true,
expectedAllowed: true,
},
{
name: "admission request with corrupted object",
@ -183,11 +206,10 @@ func TestMutatePods(t *testing.T) {
},
},
},
mode: preprogrammed,
expectedResponse: true,
},
{
name: "non-empty admission request in preprogrammed mode",
name: "successful non-empty admission request",
ar: v1beta1.AdmissionReview{
Request: &v1beta1.AdmissionRequest{
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
@ -196,23 +218,9 @@ func TestMutatePods(t *testing.T) {
},
},
},
mode: preprogrammed,
expectedResponse: true,
expectedPatchOps: 4,
},
{
name: "non-empty admission request in orchestrated mode",
ar: v1beta1.AdmissionReview{
Request: &v1beta1.AdmissionRequest{
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
Object: runtime.RawExtension{
Raw: podRaw,
},
},
},
mode: orchestrated,
expectedResponse: true,
expectedPatchOps: 5,
expectedAllowed: true,
},
{
name: "handle error after wrong getPatchOps()",
@ -220,48 +228,54 @@ func TestMutatePods(t *testing.T) {
Request: &v1beta1.AdmissionRequest{
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
Object: runtime.RawExtension{
Raw: podRaw,
Raw: brokenPodRaw,
},
},
},
mode: "unknown mode",
expectedResponse: true,
},
}
for _, tcase := range tcases {
p := &patcher{
mode: tcase.mode,
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
},
resourceMap: map[string]string{
"fpga.intel.com/arria10": "ce48969398f05f33946d560708be108a",
},
}
pm := &patcherManager{
defaultMode: tcase.mode,
patchers: map[string]*patcher{
"default": p,
},
}
resp := mutatePods(tcase.ar, pm)
t.Run(tcase.name, func(t *testing.T) {
p := newPatcher()
p.addRegion(&fpgav1.FpgaRegion{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10",
},
Spec: fpgav1.FpgaRegionSpec{
InterfaceID: "ce48969398f05f33946d560708be108a",
},
})
pm := newPatcherManager()
pm["default"] = p
resp := mutatePods(tcase.ar, pm)
if !tcase.expectedResponse && resp != nil {
t.Errorf("Test case '%s': got unexpected response", tcase.name)
} else if tcase.expectedResponse && resp == nil {
t.Errorf("Test case '%s': got no response", tcase.name)
} else if tcase.expectedResponse && tcase.expectedPatchOps > 0 {
var ops interface{}
actualPatchOps := 0
if !tcase.expectedResponse && resp != nil {
t.Errorf("Test case '%s': got unexpected response", tcase.name)
} else if tcase.expectedResponse && resp == nil {
t.Errorf("Test case '%s': got no response", tcase.name)
} else if tcase.expectedResponse {
if tcase.expectedAllowed != resp.Allowed {
t.Errorf("Allowed expected to be %t but got %t", tcase.expectedAllowed, resp.Allowed)
} else if resp.Allowed && resp.Patch != nil {
var ops interface{}
err := json.Unmarshal(resp.Patch, &ops)
if err != nil {
t.Errorf("Test case '%s': got unparsable patch '%s'", tcase.name, resp.Patch)
} else if len(ops.([]interface{})) != tcase.expectedPatchOps {
t.Errorf("Test case '%s': got wrong number of operations in the patch. Expected %d, but got %d\n%s",
tcase.name, tcase.expectedPatchOps, len(ops.([]interface{})), string(resp.Patch))
err := json.Unmarshal(resp.Patch, &ops)
if err != nil {
t.Errorf("Test case '%s': got unparsable patch '%s'", tcase.name, resp.Patch)
} else {
actualPatchOps = len(ops.([]interface{}))
}
}
}
}
if actualPatchOps != tcase.expectedPatchOps {
t.Errorf("Test case '%s': got wrong number of operations in the patch. Expected %d, but got %d\n%s",
tcase.name, tcase.expectedPatchOps, actualPatchOps, string(resp.Patch))
}
})
}
}
@ -280,6 +294,6 @@ func (*fakeResponseWriter) WriteHeader(int) {
}
func TestMakePodsHandler(t *testing.T) {
serveFunc := makePodsHandler(&patcherManager{})
serveFunc := makePodsHandler(newPatcherManager())
serveFunc(&fakeResponseWriter{}, &http.Request{})
}

View File

@ -17,7 +17,6 @@ package main
import (
"bytes"
"fmt"
"regexp"
"strings"
"sync"
"text/template"
@ -33,14 +32,12 @@ import (
const (
namespace = "fpga.intel.com"
resourceReplaceOp = `{
"op": "remove",
"path": "/spec/containers/%d/resources/%s/%s"
}, {
"op": "add",
"path": "/spec/containers/%d/resources/%s/%s",
"value": %s
}`
af = "af"
region = "region"
// "regiondevel" corresponds to the FPGA plugin's regiondevel mode. It requires
// FpgaRegion CRDs to be added to the cluster.
regiondevel = "regiondevel"
resourceRemoveOp = `{
"op": "remove",
"path": "/spec/containers/%d/resources/%s/%s"
@ -64,48 +61,52 @@ const (
{{- end -}}
]
}`
// Names of extended resources cannot be longer than 63 characters.
// Therefore for AF resources we have to cut the interface ID prefix
// to 31 characters only.
interfaceIDPrefixLength = 31
)
var (
rfc6901Escaper = strings.NewReplacer("~", "~0", "/", "~1")
resourceRe = regexp.MustCompile(namespace + `/(?P<Region>[[:alnum:].]+)(-(?P<Af>[[:alnum:]]+))?`)
)
type patcher struct {
sync.Mutex
mode string
regionMap map[string]string
afMap map[string]string
resourceMap map[string]string
afMap map[string]*fpgav1.AcceleratorFunction
resourceMap map[string]string
resourceModeMap map[string]string
}
func newPatcher(mode string) (*patcher, error) {
if mode != preprogrammed && mode != orchestrated {
return nil, errors.Errorf("Unknown mode: %s", mode)
}
func newPatcher() *patcher {
return &patcher{
mode: mode,
regionMap: make(map[string]string),
afMap: make(map[string]string),
resourceMap: make(map[string]string),
}, nil
afMap: make(map[string]*fpgav1.AcceleratorFunction),
resourceMap: make(map[string]string),
resourceModeMap: make(map[string]string),
}
}
func (p *patcher) addAf(af *fpgav1.AcceleratorFunction) {
func (p *patcher) addAf(accfunc *fpgav1.AcceleratorFunction) {
defer p.Unlock()
p.Lock()
p.afMap[af.Name] = af.Spec.AfuID
p.resourceMap[namespace+"/"+af.Name] = rfc6901Escaper.Replace(namespace + "/af-" + af.Spec.AfuID)
p.afMap[namespace+"/"+accfunc.Name] = accfunc
if accfunc.Spec.Mode == af {
p.resourceMap[namespace+"/"+accfunc.Name] = rfc6901Escaper.Replace(namespace + "/" +
accfunc.Spec.InterfaceID[:interfaceIDPrefixLength] + accfunc.Spec.AfuID)
} else {
p.resourceMap[namespace+"/"+accfunc.Name] = rfc6901Escaper.Replace(namespace + "/region-" + accfunc.Spec.InterfaceID)
}
p.resourceModeMap[namespace+"/"+accfunc.Name] = accfunc.Spec.Mode
}
func (p *patcher) addRegion(region *fpgav1.FpgaRegion) {
defer p.Unlock()
p.Lock()
p.regionMap[region.Name] = region.Spec.InterfaceID
p.resourceModeMap[namespace+"/"+region.Name] = regiondevel
p.resourceMap[namespace+"/"+region.Name] = rfc6901Escaper.Replace(namespace + "/region-" + region.Spec.InterfaceID)
}
@ -113,149 +114,138 @@ func (p *patcher) removeAf(name string) {
defer p.Unlock()
p.Lock()
delete(p.afMap, name)
delete(p.afMap, namespace+"/"+name)
delete(p.resourceMap, namespace+"/"+name)
delete(p.resourceModeMap, namespace+"/"+name)
}
func (p *patcher) removeRegion(name string) {
defer p.Unlock()
p.Lock()
delete(p.regionMap, name)
delete(p.resourceMap, namespace+"/"+name)
delete(p.resourceModeMap, namespace+"/"+name)
}
// getRequestedResources validates the container's requirements first, then returns them as a map.
func getRequestedResources(container corev1.Container) (map[string]int64, error) {
for _, v := range container.Env {
if strings.HasPrefix(v.Name, "FPGA_REGION") || strings.HasPrefix(v.Name, "FPGA_AFU") {
return nil, errors.Errorf("environment variable '%s' is not allowed", v.Name)
}
}
// Container may happen to have Requests, but not Limits. Check Requests first,
// then in the next loop iterate over Limits.
for resourceName, resourceQuantity := range container.Resources.Requests {
rname := strings.ToLower(string(resourceName))
if !strings.HasPrefix(rname, namespace) {
// Skip non-FPGA resources in Requests.
continue
}
if container.Resources.Limits[resourceName] != resourceQuantity {
return nil, errors.Errorf(
"'limits' and 'requests' for %q must be equal as extended resources cannot be overcommitted",
rname)
}
}
resources := make(map[string]int64)
for resourceName, resourceQuantity := range container.Resources.Limits {
rname := strings.ToLower(string(resourceName))
if !strings.HasPrefix(rname, namespace) {
// Skip non-FPGA resources in Limits.
continue
}
if container.Resources.Requests[resourceName] != resourceQuantity {
return nil, errors.Errorf(
"'limits' and 'requests' for %q must be equal as extended resources cannot be overcommitted",
rname)
}
quantity, ok := resourceQuantity.AsInt64()
if !ok {
return nil, errors.Errorf("resource quantity isn't of integral type for %q", rname)
}
resources[rname] = quantity
}
return resources, nil
}
func (p *patcher) getPatchOps(containerIdx int, container corev1.Container) ([]string, error) {
switch p.mode {
case preprogrammed:
return p.getPatchOpsPreprogrammed(containerIdx, container)
case orchestrated:
return p.getPatchOpsOrchestrated(containerIdx, container)
}
return nil, errors.Errorf("Uknown mode: %s", p.mode)
}
func (p *patcher) getPatchOpsPreprogrammed(containerIdx int, container corev1.Container) ([]string, error) {
var ops []string
for resourceName, resourceQuantity := range container.Resources.Limits {
newName, err := p.translateFpgaResourceName(resourceName)
if err != nil {
return nil, err
}
if len(newName) > 0 {
op := fmt.Sprintf(resourceReplaceOp, containerIdx,
"limits", rfc6901Escaper.Replace(string(resourceName)),
containerIdx, "limits", newName, resourceQuantity.String())
ops = append(ops, op)
}
}
for resourceName, resourceQuantity := range container.Resources.Requests {
newName, err := p.translateFpgaResourceName(resourceName)
if err != nil {
return nil, err
}
if len(newName) > 0 {
op := fmt.Sprintf(resourceReplaceOp, containerIdx,
"requests", rfc6901Escaper.Replace(string(resourceName)),
containerIdx, "requests", newName, resourceQuantity.String())
ops = append(ops, op)
}
}
return ops, nil
}
func (p *patcher) translateFpgaResourceName(oldname corev1.ResourceName) (string, error) {
rname := strings.ToLower(string(oldname))
if !strings.HasPrefix(rname, namespace) {
return "", nil
requestedResources, err := getRequestedResources(container)
if err != nil {
return nil, err
}
defer p.Unlock()
p.Lock()
if newname, ok := p.resourceMap[rname]; ok {
return newname, nil
}
return "", errors.Errorf("Unknown FPGA resource: %s", rname)
}
func (p *patcher) checkResourceRequests(container corev1.Container) error {
for resourceName, resourceQuantity := range container.Resources.Requests {
interfaceID, _, err := p.parseResourceName(string(resourceName))
if err != nil {
return err
}
if interfaceID == "" {
// Skip non-FPGA resources
continue
}
if container.Resources.Limits[resourceName] != resourceQuantity {
return errors.Errorf("'limits' and 'requests' for %s must be equal", string(resourceName))
}
}
return nil
}
func (p *patcher) getPatchOpsOrchestrated(containerIdx int, container corev1.Container) ([]string, error) {
var ops []string
for _, v := range container.Env {
if strings.HasPrefix(v.Name, "FPGA_REGION") || strings.HasPrefix(v.Name, "FPGA_AFU") {
return nil, errors.Errorf("The environment variable '%s' is not allowed", v.Name)
}
}
if err := p.checkResourceRequests(container); err != nil {
return nil, err
}
regions := make(map[string]int64)
fpgaPluginMode := ""
resources := make(map[string]int64)
envVars := make(map[string]string)
counter := 0
for resourceName, resourceQuantity := range container.Resources.Limits {
interfaceID, afuID, err := p.parseResourceName(string(resourceName))
if err != nil {
return nil, err
for rname, quantity := range requestedResources {
mode, found := p.resourceModeMap[rname]
if !found {
return nil, errors.Errorf("no such resource: %q", rname)
}
if interfaceID == "" && afuID == "" {
// Skip non-FPGA resources
continue
switch mode {
case regiondevel:
// Do nothing.
// The requested resources are exposed by FPGA plugins working in "regiondevel" mode.
// In this mode the workload is supposed to program FPGA regions.
// A cluster admin has to add FpgaRegion CRDs to allow this.
case af:
// Do nothing.
// The requested resources are exposed by FPGA plugins working in "af" mode.
case region:
// Let fpga_crihook know how to program the regions by setting ENV variables.
// The requested resources are exposed by FPGA plugins working in "region" mode.
for i := int64(0); i < quantity; i++ {
counter++
envVars[fmt.Sprintf("FPGA_REGION_%d", counter)] = p.afMap[rname].Spec.InterfaceID
envVars[fmt.Sprintf("FPGA_AFU_%d", counter)] = p.afMap[rname].Spec.AfuID
}
default:
msg := fmt.Sprintf("%q is registered with unknown mode %q instead of %q or %q",
rname, p.resourceModeMap[rname], af, region)
// Let admin know about broken af CRD.
klog.Error(msg)
return nil, errors.New(msg)
}
if container.Resources.Requests[resourceName] != resourceQuantity {
return nil, errors.Errorf("'limits' and 'requests' for %s must be equal", string(resourceName))
if fpgaPluginMode == "" {
fpgaPluginMode = mode
} else if fpgaPluginMode != mode {
return nil, errors.New("container cannot be scheduled as it requires resources operated in different modes")
}
quantity, ok := resourceQuantity.AsInt64()
if !ok {
return nil, errors.New("Resource quantity isn't of integral type")
}
regions[interfaceID] = regions[interfaceID] + quantity
mappedName := p.resourceMap[rname]
resources[mappedName] = resources[mappedName] + quantity
for i := int64(0); i < quantity; i++ {
counter++
envVars[fmt.Sprintf("FPGA_REGION_%d", counter)] = interfaceID
envVars[fmt.Sprintf("FPGA_AFU_%d", counter)] = afuID
}
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "limits", rfc6901Escaper.Replace(string(resourceName))))
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "requests", rfc6901Escaper.Replace(string(resourceName))))
// Add operations to remove unresolved resources from the pod.
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "limits", rfc6901Escaper.Replace(rname)))
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "requests", rfc6901Escaper.Replace(rname)))
}
for interfaceID, quantity := range regions {
op := fmt.Sprintf(resourceAddOp, containerIdx, "limits", rfc6901Escaper.Replace(namespace+"/region-"+interfaceID), quantity)
// Add operations to add resolved resources to the pod.
for resource, quantity := range resources {
op := fmt.Sprintf(resourceAddOp, containerIdx, "limits", resource, quantity)
ops = append(ops, op)
op = fmt.Sprintf(resourceAddOp, containerIdx, "requests", rfc6901Escaper.Replace(namespace+"/region-"+interfaceID), quantity)
op = fmt.Sprintf(resourceAddOp, containerIdx, "requests", resource, quantity)
ops = append(ops, op)
}
// Add the ENV variables to the pod if needed.
if len(envVars) > 0 {
for _, envvar := range container.Env {
envVars[envvar.Name] = envvar.Value
@ -276,69 +266,21 @@ func (p *patcher) getPatchOpsOrchestrated(containerIdx int, container corev1.Con
return ops, nil
}
func (p *patcher) parseResourceName(input string) (string, string, error) {
var interfaceID, afuID string
var regionName, afName string
var ok bool
result := resourceRe.FindStringSubmatch(input)
if result == nil {
return "", "", nil
}
defer p.Unlock()
p.Lock()
for num, group := range resourceRe.SubexpNames() {
switch group {
case "Region":
regionName = result[num]
if interfaceID, ok = p.regionMap[result[num]]; !ok {
return "", "", errors.Errorf("Unknown region name: %s", result[num])
}
case "Af":
afName = result[num]
}
}
if afName != "" {
if afuID, ok = p.afMap[regionName+"-"+afName]; !ok {
return "", "", errors.Errorf("Unknown AF name: %s", regionName+"-"+afName)
}
}
return interfaceID, afuID, nil
}
// patcherManager keeps track of patchers registered for different Kubernetes namespaces.
type patcherManager struct {
defaultMode string
patchers map[string]*patcher
type patcherManager map[string]*patcher
func newPatcherManager() patcherManager {
return make(map[string]*patcher)
}
func newPatcherManager(defaultMode string) (*patcherManager, error) {
if defaultMode != preprogrammed && defaultMode != orchestrated {
return nil, errors.Errorf("Unknown mode: %s", defaultMode)
func (pm patcherManager) getPatcher(namespace string) *patcher {
if p, ok := pm[namespace]; ok {
return p
}
return &patcherManager{
defaultMode: defaultMode,
patchers: make(map[string]*patcher),
}, nil
}
func (pm *patcherManager) getPatcher(namespace string) (*patcher, error) {
if p, ok := pm.patchers[namespace]; ok {
return p, nil
}
p, err := newPatcher(pm.defaultMode)
if err != nil {
return nil, err
}
pm.patchers[namespace] = p
p := newPatcher()
pm[namespace] = p
klog.V(4).Info("created new patcher for namespace", namespace)
return p, nil
return p
}

View File

@ -21,7 +21,6 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog"
fpgav1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/fpga.intel.com/v1"
)
@ -48,186 +47,51 @@ func TestPatcherStorageFunctions(t *testing.T) {
},
}
p, err := newPatcher(preprogrammed)
if err != nil {
t.Fatal(err)
}
p := newPatcher()
p.addAf(af)
if len(p.afMap) != 1 || len(p.resourceMap) != 1 {
if len(p.resourceModeMap) != 1 || len(p.afMap) != 1 || len(p.resourceMap) != 1 {
t.Error("Failed to add AF to patcher")
}
p.removeAf(af.Name)
if len(p.afMap) != 0 || len(p.resourceMap) != 0 {
if len(p.resourceModeMap) != 0 || len(p.afMap) != 0 || len(p.resourceMap) != 0 {
t.Error("Failed to remove AF from patcher")
}
p.addRegion(region)
if len(p.regionMap) != 1 || len(p.resourceMap) != 1 {
if len(p.resourceModeMap) != 1 || len(p.resourceMap) != 1 {
t.Error("Failed to add fpga region to patcher")
}
p.removeRegion(region.Name)
if len(p.regionMap) != 0 || len(p.resourceMap) != 0 {
if len(p.resourceModeMap) != 0 || len(p.resourceMap) != 0 {
t.Error("Failed to remove fpga region from patcher")
}
}
func TestGetPatchOpsPreprogrammed(t *testing.T) {
func TestGetPatchOps(t *testing.T) {
tcases := []struct {
name string
resourceMap map[string]string
container corev1.Container
afs []*fpgav1.AcceleratorFunction
regions []*fpgav1.FpgaRegion
expectedErr bool
expectedOps int
}{
{
name: "Empty container",
},
{
name: "Unknown resource in limits",
name: "Successful handling for region mode",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
"cpu": resource.MustParse("1"),
},
},
},
expectedErr: true,
},
{
name: "Unknown resource in requests",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
"cpu": resource.MustParse("1"),
},
},
},
expectedErr: true,
},
{
name: "Successful case",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"cpu": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb0-alias": resource.MustParse("2"),
"cpu": resource.MustParse("1"),
},
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"cpu": resource.MustParse("1"),
},
},
},
resourceMap: map[string]string{
"fpga.intel.com/arria10-nlb0": rfc6901Escaper.Replace("fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b"),
},
expectedOps: 2,
},
}
for _, tt := range tcases {
p := &patcher{
resourceMap: tt.resourceMap,
}
ops, err := p.getPatchOpsPreprogrammed(0, tt.container)
if tt.expectedErr && err == nil {
t.Errorf("Test case '%s': no error returned", tt.name)
}
if !tt.expectedErr && err != nil {
t.Errorf("Test case '%s': unexpected error %v", tt.name, err)
}
if len(ops) != tt.expectedOps {
t.Errorf("test case '%s': expected %d ops, but got %d\n%v", tt.name, tt.expectedOps, len(ops), ops)
}
}
}
func TestParseResourceName(t *testing.T) {
tcases := []struct {
input string
interfaceID string
afuID string
afMap map[string]string
regionMap map[string]string
expectedErr bool
}{
{
input: "fpga.intel.com/arria10",
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
},
interfaceID: "ce48969398f05f33946d560708be108a",
},
{
input: "fpga.intel.com/arria10-unknown",
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
},
expectedErr: true,
},
{
input: "fpga.intel.com/unknown",
expectedErr: true,
},
{
input: "fpga.example.com/something",
},
{
input: "fpga.intel.com/arria10-nlb0",
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
},
afMap: map[string]string{
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
},
interfaceID: "ce48969398f05f33946d560708be108a",
afuID: "d8424dc4a4a3c413f89e433683f9040b",
},
}
for num, tt := range tcases {
p := &patcher{
afMap: tt.afMap,
regionMap: tt.regionMap,
}
interfaceID, afuID, err := p.parseResourceName(tt.input)
if tt.expectedErr {
if err != nil {
continue
} else {
t.Errorf("In case %d we didn't get error", num)
}
}
if tt.interfaceID != interfaceID || tt.afuID != afuID {
t.Errorf("In case %d expected (%s, %s), but got (%s, %s)", num, tt.interfaceID, tt.afuID, interfaceID, afuID)
}
}
}
func TestGetPatchOpsOrchestrated(t *testing.T) {
tcases := []struct {
name string
container corev1.Container
afMap map[string]string
regionMap map[string]string
expectedErr bool
expectedOps int
}{
{
name: "Successful handling",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"cpu": resource.MustParse("1"),
},
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"cpu": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb0-alias": resource.MustParse("2"),
"cpu": resource.MustParse("3"),
},
},
Env: []corev1.EnvVar{
@ -237,13 +101,79 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
},
},
},
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
afs: []*fpgav1.AcceleratorFunction{
{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10-nlb0",
},
Spec: fpgav1.AcceleratorFunctionSpec{
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
InterfaceID: "ce48969398f05f33946d560708be108a",
Mode: region,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10-nlb0-alias",
},
Spec: fpgav1.AcceleratorFunctionSpec{
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
InterfaceID: "ce48969398f05f33946d560708be108a",
Mode: region,
},
},
},
afMap: map[string]string{
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
expectedOps: 7,
},
{
name: "Successful handling for af mode",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
},
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
},
},
},
expectedOps: 5,
afs: []*fpgav1.AcceleratorFunction{
{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10-nlb0",
},
Spec: fpgav1.AcceleratorFunctionSpec{
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
InterfaceID: "ce48969398f05f33946d560708be108a",
Mode: af,
},
},
},
expectedOps: 4,
},
{
name: "Successful handling for regiondevel mode",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10": resource.MustParse("1"),
},
Requests: corev1.ResourceList{
"fpga.intel.com/arria10": resource.MustParse("1"),
},
},
},
regions: []*fpgav1.FpgaRegion{
{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10",
},
Spec: fpgav1.FpgaRegionSpec{
InterfaceID: "ce48969398f05f33946d560708be108a",
},
},
},
expectedOps: 4,
},
{
name: "Unequal FPGA resources in Limits and Requests 1",
@ -255,13 +185,6 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
},
},
},
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
},
afMap: map[string]string{
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
"arria10-nlb3": "f7df405cbd7acf7222f144b0b93acd18",
},
expectedErr: true,
},
{
@ -270,45 +193,19 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb3": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb3": resource.MustParse("2"),
},
},
},
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
},
afMap: map[string]string{
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
"arria10-nlb3": "f7df405cbd7acf7222f144b0b93acd18",
},
expectedErr: true,
},
{
name: "Unknown FPGA model in Requests",
name: "Unknown FPGA resources in container requirements",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
"fpga.intel.com/unknown-nlb0": resource.MustParse("1"),
},
},
},
expectedErr: true,
},
{
name: "Unknown AFU in Requests",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
},
},
},
expectedErr: true,
},
{
name: "Unknown FPGA model in Limits",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/unknown-nlb0": resource.MustParse("1"),
},
@ -316,23 +213,12 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
},
expectedErr: true,
},
{
name: "Unknown AFU in Limits",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
},
},
},
expectedErr: true,
},
{
name: "Wrong ENV",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"cpu": resource.MustParse("1"),
},
},
Env: []corev1.EnvVar{
@ -342,12 +228,6 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
},
},
},
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
},
afMap: map[string]string{
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
},
expectedErr: true,
},
{
@ -362,59 +242,119 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
},
},
},
regionMap: map[string]string{
"arria10": "ce48969398f05f33946d560708be108a",
expectedErr: true,
},
{
name: "Require resources operated in af and region modes",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb3": resource.MustParse("2"),
"cpu": resource.MustParse("1"),
},
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
"fpga.intel.com/arria10-nlb3": resource.MustParse("2"),
"cpu": resource.MustParse("3"),
},
},
},
afMap: map[string]string{
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
afs: []*fpgav1.AcceleratorFunction{
{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10-nlb0",
},
Spec: fpgav1.AcceleratorFunctionSpec{
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
InterfaceID: "ce48969398f05f33946d560708be108a",
Mode: region,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10-nlb3",
},
Spec: fpgav1.AcceleratorFunctionSpec{
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
InterfaceID: "f7df405cbd7acf7222f144b0b93acd18",
Mode: af,
},
},
},
expectedErr: true,
},
{
name: "Unknown mode",
container: corev1.Container{
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
},
Requests: corev1.ResourceList{
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
},
},
},
afs: []*fpgav1.AcceleratorFunction{
{
ObjectMeta: metav1.ObjectMeta{
Name: "arria10-nlb0",
},
Spec: fpgav1.AcceleratorFunctionSpec{
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
InterfaceID: "ce48969398f05f33946d560708be108a",
Mode: "unknown",
},
},
},
expectedErr: true,
},
}
for _, tt := range tcases {
p := &patcher{
afMap: tt.afMap,
regionMap: tt.regionMap,
}
klog.V(4).Info(tt.name)
ops, err := p.getPatchOpsOrchestrated(0, tt.container)
if tt.expectedErr && err == nil {
t.Errorf("Test case '%s': no error returned", tt.name)
}
if !tt.expectedErr && err != nil {
t.Errorf("Test case '%s': unexpected error %+v", tt.name, err)
}
if len(ops) != tt.expectedOps {
t.Errorf("test case '%s': expected %d ops, but got %d\n%v", tt.name, tt.expectedOps, len(ops), ops)
}
}
}
func TestNewPatcherManager(t *testing.T) {
tcases := []struct {
name string
defaultMode string
expectedErr bool
}{
{
name: "Everything is OK",
defaultMode: preprogrammed,
},
{
name: "Unknown default mode",
defaultMode: "unknownMode",
expectedErr: true,
},
}
for _, tt := range tcases {
t.Run(tt.name, func(t *testing.T) {
_, err := newPatcherManager(tt.defaultMode)
p := newPatcher()
for _, af := range tt.afs {
p.addAf(af)
}
for _, region := range tt.regions {
p.addRegion(region)
}
ops, err := p.getPatchOps(0, tt.container)
if tt.expectedErr && err == nil {
t.Errorf("Test case '%s': no error returned", tt.name)
}
if !tt.expectedErr && err != nil {
t.Errorf("Test case '%s': unexpected error %+v", tt.name, err)
t.Errorf("Test case '%s': unexpected error: %+v", tt.name, err)
}
if len(ops) != tt.expectedOps {
t.Errorf("test case '%s': expected %d ops, but got %d\n%v", tt.name, tt.expectedOps, len(ops), ops)
}
})
}
}
func TestGetPatcher(t *testing.T) {
namespace := "test"
tcases := []struct {
name string
pm patcherManager
}{
{
name: "Create new patcher",
pm: newPatcherManager(),
},
{
name: "Return existing patcher",
pm: map[string]*patcher{namespace: newPatcher()},
},
}
for _, tt := range tcases {
t.Run(tt.name, func(t *testing.T) {
p := tt.pm.getPatcher(namespace)
if p != tt.pm[namespace] {
t.Error("stored and received patchers are not equal")
}
})
}

View File

@ -12,7 +12,7 @@
* [Verify node kubelet config](#verify-node-kubelet-config)
* [Deploying as a DaemonSet](#deploying-as-a-daemonset)
* [Create a service account](#create-a-service-account)
* [Deploying `orchestrated` mode](#deploying-orchestrated-mode)
* [Deploying `region` mode](#deploying-region-mode)
* [Deploying `af` mode](#deploying-af-mode)
* [Deploy the DaemonSet](#deploy-the-daemonset)
* [Verify plugin registration](#verify-plugin-registration)
@ -75,26 +75,26 @@ development, initial deployment and debugging.
The FPGA plugin set can run in one of two modes:
- `region`/`orchestrated` mode, where the plugins locate and advertise
- `region` mode, where the plugins locate and advertise
regions of the FPGA, and facilitate programing of those regions with the
requested bistreams.
- `af`/`preprogrammed` mode, where the FPGA bitstreams are already loaded
- `af` mode, where the FPGA bitstreams are already loaded
onto the FPGA, and the plugins discover and advertises the existing
Accelerator Functions (AF).
The example YAML deployments described in this document only currently support
`af`/`preprogrammed` mode. To utilise `region`/`orchestrated` mode, either modify
the existing YAML appropriately, or deploy 'by hand'.
`af` mode. To utilise `region` mode, either modify the existing YAML appropriately,
or deploy 'by hand'.
Overview diagrams of `preprogrammed` and `orchestrated` modes are below:
Overview diagrams of `af` and `region` modes are below:
Orchestrated/region mode:
region mode:
![Overview of `orchestrated` mode](pictures/FPGA-orchestrated.png)
![Overview of `region` mode](pictures/FPGA-region.png)
Preprogrammed/af mode:
af mode:
![Overview of `preprogrammed` mode](pictures/FPGA-preprogrammed.png)
![Overview of `af` mode](pictures/FPGA-af.png)
# Installation
@ -136,12 +136,11 @@ major components:
- [FPGA admission controller webhook](../fpga_admissionwebhook/README.md)
- [FPGA prestart CRI-O hook](../fpga_crihook/README.md)
The CRI-O hook is only *required* if `orchestrated` FPGA bitstream programming mode is
being used, but is installed by default by the
The CRI-O hook is only *required* if `region` mode is being used, but is installed by default by the
[FPGA plugin DaemonSet YAML](../../deployments/fpga_plugin/fpga_plugin.yaml), and is benign
in `preprogrammed` mode.
in `af` mode.
If using the `preprogrammed` mode, and therefore *not* using the
If using the `af` mode, and therefore *not* using the
CRI-O prestart hook, runtimes other than CRI-O can be used (that is, the CRI-O hook presently
*only* works with the CRI-O runtime).
@ -192,8 +191,8 @@ YAML deployment files to reference your required image.
### For beta testing: new deployment model
The FPGA plugin deployment is currently being rewritten to enable
straight-forward deployment of both `af/preprogrammed` and
`region/orchestrated` modes. The deployment has two steps:
straight-forward deployment of both `af` and
`region` modes. The deployment has two steps:
1. Run `scripts/fpga-plugin-prepare-for-kustomization.sh`. This will
create the necessary secrets: a key and a signed certificate for
@ -226,19 +225,16 @@ clusterrole.rbac.authorization.k8s.io/node-getter created
clusterrolebinding.rbac.authorization.k8s.io/get-nodes created
```
### Deploying `orchestrated` mode
### Deploying `region` mode
To deploy the FPGA plugin DaemonSet in `orchestrated` (`region`) mode, you need to set the plugin
To deploy the FPGA plugin DaemonSet in `region` mode, you need to set the plugin
mode annotation on all of your nodes, otherwise the FPGA plugin will run in its default
`af` (`preprogrammed`) mode.
`af` mode.
```bash
$ kubectl annotate node --all 'fpga.intel.com/device-plugin-mode=region'
```
Mixing of the two modes (`orchestrated` and `af`) across nodes in the same cluster is
*not currently supported*.
### Deploying `af` mode
To deploy the FPGA plugin DaemonSet in `af` mode, you do not need to set the mode annotation on
@ -260,7 +256,7 @@ daemonset.apps/intel-fpga-plugin created
### Verify plugin registration
Verify the FPGA plugin has been deployed on the nodes. The below shows the output
you can expect in `region` mode, but similar output should be expected for `preprogrammed`
you can expect in `region` mode, but similar output should be expected for `af`
mode:
```bash

View File

@ -116,7 +116,7 @@ func getDevicesDFL() []device {
},
{
id: "dfl-port.4",
afuID: "d8424dc4a4a3c413f89e433683f9040b",
afuID: unhealthyAfuID,
devNode: "/dev/dfl-port.4",
},
},
@ -240,7 +240,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
Permissions: "rw",
},
}
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "dfl-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
nodes = []pluginapi.DeviceSpec{
{
@ -250,7 +250,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
},
}
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "dfl-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
nodes = []pluginapi.DeviceSpec{
{
@ -259,7 +259,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
Permissions: "rw",
},
}
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "dfl-port.2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
nodes = []pluginapi.DeviceSpec{
{
@ -268,7 +268,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
Permissions: "rw",
},
}
expected.AddDevice(afMode+"-"+unhealthyAfuID, "dfl-port.3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
expected.AddDevice(unhealthyInterfaceID[:interfaceIDPrefixLength]+unhealthyAfuID, "dfl-port.3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
nodes = []pluginapi.DeviceSpec{
{
@ -277,11 +277,11 @@ func TestGetAfuTreeDFL(t *testing.T) {
Permissions: "rw",
},
}
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.4", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
expected.AddDevice(unhealthyInterfaceID[:interfaceIDPrefixLength]+unhealthyAfuID, "dfl-port.4", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
result := getAfuTree(getDevicesDFL())
if !reflect.DeepEqual(result, expected) {
t.Errorf("Got unexpected result: %v, expected: %v", result, expected)
t.Errorf("Got unexpected result:\n%v\nexpected:\n%v", result, expected)
}
}

View File

@ -51,6 +51,11 @@ const (
// Frequency of device scans
scanFrequency = 5 * time.Second
// Names of extended resources cannot be longer than 63 characters.
// Therefore for AF resources we have to cut the interface ID prefix
// to 31 characters only.
interfaceIDPrefixLength = 31
)
type getDevTreeFunc func(devices []device) dpapi.DeviceTree
@ -125,7 +130,7 @@ func getAfuTree(devices []device) dpapi.DeviceTree {
if afu.afuID == unhealthyAfuID {
health = pluginapi.Unhealthy
}
devType := fmt.Sprintf("%s-%s", afMode, afu.afuID)
devType := region.interfaceID[:interfaceIDPrefixLength] + afu.afuID
devNodes := []pluginapi.DeviceSpec{
{
HostPath: afu.devNode,

View File

@ -210,7 +210,7 @@ func TestGetAfuTreeOPAE(t *testing.T) {
Permissions: "rw",
},
}
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
nodes = []pluginapi.DeviceSpec{
{
@ -219,7 +219,7 @@ func TestGetAfuTreeOPAE(t *testing.T) {
Permissions: "rw",
},
}
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
nodes = []pluginapi.DeviceSpec{
{
@ -228,7 +228,7 @@ func TestGetAfuTreeOPAE(t *testing.T) {
Permissions: "rw",
},
}
expected.AddDevice(afMode+"-"+unhealthyAfuID, "intel-fpga-port.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
expected.AddDevice(unhealthyInterfaceID[:interfaceIDPrefixLength]+unhealthyAfuID, "intel-fpga-port.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
result := getAfuTree(getDevicesOPAE())
if !reflect.DeepEqual(result, expected) {

View File

Before

Width:  |  Height:  |  Size: 53 KiB

After

Width:  |  Height:  |  Size: 53 KiB

View File

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

View File

@ -19,4 +19,10 @@ spec:
properties:
afuId:
type: string
pattern: '^[0-9a-f]{8,128}$'
pattern: '^[0-9a-f]{8,32}$'
interfaceId:
type: string
pattern: '^[0-9a-f]{8,32}$'
mode:
type: string
pattern: '^af|region$'

View File

@ -1,13 +1,6 @@
# DCP 1.0
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
metadata:
name: arria10.dcp1.0-compress
spec:
afuId: 946c21d1e49704a5e5daa0805bc6b0785e1765bf
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
metadata:
name: arria10.dcp1.0-nlb0
spec:

View File

@ -29,7 +29,6 @@ spec:
args:
- -tls-cert-file=/etc/webhook/certs/cert.pem
- -tls-private-key-file=/etc/webhook/certs/key.pem
- -mode={MODE}
- -v=1
volumeMounts:
- name: webhook-certs

View File

@ -8,17 +8,12 @@ spec:
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
metadata:
name: arria10.dcp1.0-compress
spec:
afuId: 946c21d1e49704a5e5daa0805bc6b0785e1765bf
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
metadata:
name: arria10.dcp1.0-nlb0
spec:
afuId: d8424dc4a4a3c413f89e433683f9040b
interfaceId: ce48969398f05f33946d560708be108a
mode: region
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
@ -26,6 +21,8 @@ metadata:
name: arria10.dcp1.0-nlb3
spec:
afuId: f7df405cbd7acf7222f144b0b93acd18
interfaceId: ce48969398f05f33946d560708be108a
mode: region
---
# DCP 1.1
apiVersion: fpga.intel.com/v1
@ -41,6 +38,8 @@ metadata:
name: arria10.dcp1.1-nlb0
spec:
afuId: d8424dc4a4a3c413f89e433683f9040b
interfaceId: 9926ab6d6c925a68aabca7d84c545738
mode: region
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
@ -48,6 +47,8 @@ metadata:
name: arria10.dcp1.1-nlb3
spec:
afuId: f7df405cbd7acf7222f144b0b93acd18
interfaceId: 9926ab6d6c925a68aabca7d84c545738
mode: region
---
# DCP 1.2
apiVersion: fpga.intel.com/v1
@ -63,6 +64,8 @@ metadata:
name: arria10.dcp1.2-nlb0
spec:
afuId: d8424dc4a4a3c413f89e433683f9040b
interfaceId: 69528db6eb31577a8c3668f9faa081f6
mode: region
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
@ -70,6 +73,8 @@ metadata:
name: arria10.dcp1.2-nlb3
spec:
afuId: f7df405cbd7acf7222f144b0b93acd18
interfaceId: 69528db6eb31577a8c3668f9faa081f6
mode: region
---
# D5005
apiVersion: fpga.intel.com/v1
@ -85,6 +90,8 @@ metadata:
name: d5005-nlb0
spec:
afuId: d8424dc4a4a3c413f89e433683f9040b
interfaceId: bfac4d851ee856fe8c95865ce1bbaa2d
mode: region
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
@ -92,3 +99,14 @@ metadata:
name: d5005-nlb3
spec:
afuId: f7df405cbd7acf7222f144b0b93acd18
interfaceId: bfac4d851ee856fe8c95865ce1bbaa2d
mode: region
---
apiVersion: fpga.intel.com/v1
kind: AcceleratorFunction
metadata:
name: d5005-nlb3-preprogrammed
spec:
afuId: f7df405cbd7acf7222f144b0b93acd18
interfaceId: bfac4d851ee856fe8c95865ce1bbaa2d
mode: af

View File

@ -19,4 +19,4 @@ spec:
properties:
interfaceId:
type: string
pattern: '^[0-9a-f]{8,128}$'
pattern: '^[0-9a-f]{8,32}$'

View File

@ -18,7 +18,9 @@ type AcceleratorFunction struct {
// AcceleratorFunctionSpec contains actual specs for AcceleratorFunction
type AcceleratorFunctionSpec struct {
AfuID string `json:"afuId"`
AfuID string `json:"afuId"`
InterfaceID string `json:"interfaceId"`
Mode string `json:"mode"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

View File

@ -15,7 +15,6 @@ function help {
echo ''
echo ' Options:'
echo ' --kubectl <kubectl> - path to the kubectl utility'
echo ' --mode <mode> - "preprogrammed" (default) or "orchestrated" mode of operation'
echo ' --ca-bundle-path <path> - path to CA bundle used for signing cerificates in the cluster'
echo ' --namespace <name> - namespace to deploy the webhook in'
}
@ -30,10 +29,6 @@ while [[ $# -gt 0 ]]; do
cabundlepath="$2"
shift
;;
--mode)
mode="$2"
shift
;;
--namespace)
namespace="$2"
shift
@ -54,7 +49,6 @@ while [[ $# -gt 0 ]]; do
done
[ -z ${kubectl} ] && kubectl="kubectl"
[ -z ${mode} ] && mode="preprogrammed"
[ -z ${namespace} ] && namespace="default"
which ${kubectl} > /dev/null 2>&1 || { echo "ERROR: ${kubectl} not found"; exit 1; }
@ -75,11 +69,6 @@ if [ "x${command}" = "xcleanup" ]; then
exit 0
fi
if [ "x${mode}" != "xpreprogrammed" -a "x${mode}" != "xorchestrated" ]; then
echo "ERROR: supported modes are 'preprogrammed' and 'orchestrated'"
exit 1
fi
if [ -z ${cabundlepath} ]; then
CA_BUNDLE=$(${kubectl} get configmap -n kube-system extension-apiserver-authentication -o=jsonpath='{.data.client-ca-file}' | base64 -w 0)
else
@ -98,7 +87,7 @@ cat ${srcroot}/deployments/fpga_admissionwebhook/rbac-config-tpl.yaml | \
${kubectl} create -f -
echo "Create webhook deployment"
cat ${srcroot}/deployments/fpga_admissionwebhook/deployment-tpl.yaml | sed -e "s/{MODE}/${mode}/g" -e "s/{uid}/${uid}/g" -e "s/{gid}/${gid}/g" | ${kubectl} --namespace ${namespace} create -f -
cat ${srcroot}/deployments/fpga_admissionwebhook/deployment-tpl.yaml | sed -e "s/{uid}/${uid}/g" -e "s/{gid}/${gid}/g" | ${kubectl} --namespace ${namespace} create -f -
echo "Create webhook service"
${kubectl} --namespace ${namespace} create -f ${srcroot}/deployments/fpga_admissionwebhook/service.yaml

View File

@ -51,25 +51,37 @@ func describe() {
return append(os.Environ(), "KUBECONFIG="+framework.TestContext.KubeConfig)
}
ginkgo.It("mutates created pods to reference resolved AFs in preprogrammed mode", func() {
ginkgo.By("deploying webhook in preprogrammed mode")
ginkgo.It("mutates created pods to reference resolved AFs", func() {
ginkgo.By("deploying webhook")
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name)
framework.ExpectNoError(err)
checkPodMutation(f, "fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b")
checkPodMutation(f, "fpga.intel.com/d5005-nlb3-preprogrammed",
"fpga.intel.com/bfac4d851ee856fe8c95865ce1bbaa2f7df405cbd7acf7222f144b0b93acd18")
})
ginkgo.It("mutates created pods to reference resolved Regions in orchestrated mode", func() {
ginkgo.By("deploying webhook in orchestrated mode")
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name, "--mode", "orchestrated")
ginkgo.It("mutates created pods to reference resolved Regions", func() {
ginkgo.By("deploying webhook")
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name)
framework.ExpectNoError(err)
checkPodMutation(f, "fpga.intel.com/region-ce48969398f05f33946d560708be108a")
checkPodMutation(f, "fpga.intel.com/arria10.dcp1.0-nlb0",
"fpga.intel.com/region-ce48969398f05f33946d560708be108a")
})
ginkgo.It("mutates created pods to reference resolved Regions in regiondevel mode", func() {
ginkgo.By("deploying webhook")
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name)
framework.ExpectNoError(err)
checkPodMutation(f, "fpga.intel.com/arria10.dcp1.0",
"fpga.intel.com/region-ce48969398f05f33946d560708be108a")
})
}
func checkPodMutation(f *framework.Framework, expectedMutation v1.ResourceName) {
func checkPodMutation(f *framework.Framework, source, expectedMutation v1.ResourceName) {
ginkgo.By("waiting for webhook's availability")
if _, err := e2epod.WaitForPodsWithLabelRunningReady(f.ClientSet, f.Namespace.Name,
labels.Set{"app": "intel-fpga-webhook"}.AsSelector(), 1 /* one replica */, 10*time.Second); err != nil {
@ -80,8 +92,8 @@ func checkPodMutation(f *framework.Framework, expectedMutation v1.ResourceName)
ginkgo.By("submitting a pod for addmission")
podSpec := f.NewTestPod("webhook-tester",
v1.ResourceList{"fpga.intel.com/arria10.dcp1.0-nlb0": resource.MustParse("1")},
v1.ResourceList{"fpga.intel.com/arria10.dcp1.0-nlb0": resource.MustParse("1")})
v1.ResourceList{source: resource.MustParse("1")},
v1.ResourceList{source: resource.MustParse("1")})
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(),
podSpec, metav1.CreateOptions{})
framework.ExpectNoError(err, "pod Create API error")