mirror of
https://github.com/intel/intel-device-plugins-for-kubernetes.git
synced 2025-06-03 03:59:37 +00:00
webhook: remove mode of operation
fpga: make AFU resource name 63 char long webhook: drop mode from README webhook: extend mappings description webhook: tighten CRD definitions webhook: drop mapping to non-existing afuId explicitly state mappings names can be in any format use consistent terminology across fpga webhook and plugin
This commit is contained in:
parent
96d9e642e1
commit
6c2eacfae5
@ -30,16 +30,6 @@ The admission controller also keeps the user from bypassing namespaced mapping r
|
||||
by denying admission of any pods that are trying to use internal knowledge of InterfaceID or
|
||||
Bitstream ID environment variables used by the prestart hook.
|
||||
|
||||
The admission controller can operate in two separate modes - preprogrammed or orchestration programmed.
|
||||
The mode must be chosen to match that of the [FPGA plugin](../fpga_plugin/README.md) configuraton, as
|
||||
shown in the following table:
|
||||
|
||||
| FPGA plugin mode | matching admission controller mode |
|
||||
|:---------------- |:---------------------------------- |
|
||||
| region | orchestrated |
|
||||
| af | preprogrammed |
|
||||
|
||||
|
||||
# Dependencies
|
||||
|
||||
This component is one of a set of components that work together. You may also want to
|
||||
@ -118,14 +108,6 @@ Register webhook
|
||||
mutatingwebhookconfiguration.admissionregistration.k8s.io/fpga-mutator-webhook-cfg created
|
||||
```
|
||||
|
||||
By default, the script deploys the webhook in a preprogrammed mode.
|
||||
|
||||
Use the option `--mode` script option to deploy the webhook in orchestrated mode:
|
||||
|
||||
```bash
|
||||
$ ./scripts/webhook-deploy.sh --mode orchestrated
|
||||
```
|
||||
|
||||
The script needs the CA bundle used for signing certificate requests in your cluster.
|
||||
By default, the script fetches the bundle stored in the configmap
|
||||
`extension-apiserver-authentication`. However, your cluster may use a different signing
|
||||
@ -138,13 +120,38 @@ $ ./scripts/webhook-deploy.sh --ca-bundle-path /var/run/kubernetes/server-ca.crt
|
||||
|
||||
# Mappings
|
||||
|
||||
Requested FPGA resources are translated to AF resources. For example,
|
||||
`fpga.intel.com/arria10.dcp1.1-nlb0` is translated to `fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b`.
|
||||
Mappings is a an essential part of the setup that gives a flexible instrument to a cluster
|
||||
administrator to manage FPGA bitstreams and to control access to them. Being a set of
|
||||
custom resource definitions they are used to configure the way FPGA resource requests get
|
||||
translated into actual resources provided by the cluster.
|
||||
|
||||
In orchestrated mode, `fpga.intel.com/arria10.dcp1.1-nlb0` gets translated to
|
||||
`fpga.intel.com/region-9926ab6d6c925a68aabca7d84c545738`, and, the corresponding AF IDs are set in
|
||||
environment variables for the container. The [FPGA CRI-O hook](../fpga_crihook/README.md)
|
||||
then loads the requested bitstream to a region before the container is started.
|
||||
For the following mapping
|
||||
|
||||
```yaml
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
metadata:
|
||||
name: arria10.dcp1.1-nlb0
|
||||
spec:
|
||||
afuId: d8424dc4a4a3c413f89e433683f9040b
|
||||
interfaceId: 9926ab6d6c925a68aabca7d84c545738
|
||||
mode: af
|
||||
```
|
||||
|
||||
requested FPGA resources are translated to AF resources. For example,
|
||||
`fpga.intel.com/arria10.dcp1.1-nlb0` is translated to
|
||||
`fpga.intel.com/9926ab6d6c925a68aabca7d84c54573d8424dc4a4a3c413f89e433683f9040b`.
|
||||
The first 31 characters of the resource name part (`9926ab6d6c925a68aabca7d84c54573`)
|
||||
is the first 31 characters of the region interface ID for Arria10 with DCP1.1
|
||||
firmware. The next 32 characters (`d8424dc4a4a3c413f89e433683f9040b`) is an accelerator function ID.
|
||||
The format of resource names (e.g. `arria10.dcp1.1-nlb0`) can be any and is up
|
||||
to a cluster administrator.
|
||||
|
||||
The same mapping, but with its mode field set to `region`, translates
|
||||
`fpga.intel.com/arria10.dcp1.1-nlb0` to `fpga.intel.com/region-9926ab6d6c925a68aabca7d84c545738`,
|
||||
and the corresponding AF IDs are set in environment variables for the container.
|
||||
The [FPGA CRI-O hook](../fpga_crihook/README.md) then loads the requested bitstream to a region
|
||||
before the container is started.
|
||||
|
||||
Mappings of resource names are configured with objects of `AcceleratorFunction` and
|
||||
`FpgaRegion` custom resource definitions found respectively in
|
||||
|
@ -42,7 +42,7 @@ type fpgaObjectKey struct {
|
||||
}
|
||||
|
||||
type controller struct {
|
||||
patcherManager *patcherManager
|
||||
patcherManager patcherManager
|
||||
informerFactory informers.SharedInformerFactory
|
||||
afsSynced cache.InformerSynced
|
||||
regionsSynced cache.InformerSynced
|
||||
@ -52,7 +52,7 @@ type controller struct {
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
func newController(patcherManager *patcherManager, config *rest.Config) (*controller, error) {
|
||||
func newController(patcherManager patcherManager, config *rest.Config) (*controller, error) {
|
||||
clientset, err := clientset.NewForConfig(config)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "Failed to create REST clientset")
|
||||
@ -172,11 +172,7 @@ func (c *controller) syncAfHandler(key string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
patcher, err := c.patcherManager.getPatcher(namespace)
|
||||
if err != nil {
|
||||
runtime.HandleError(errors.Wrapf(err, "can't get patcher for namespace %s", namespace))
|
||||
return nil
|
||||
}
|
||||
patcher := c.patcherManager.getPatcher(namespace)
|
||||
|
||||
// Get the AcceleratorFunction resource with this namespace/name
|
||||
af, err := c.afLister.AcceleratorFunctions(namespace).Get(name)
|
||||
@ -206,11 +202,7 @@ func (c *controller) syncRegionHandler(key string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
patcher, err := c.patcherManager.getPatcher(namespace)
|
||||
if err != nil {
|
||||
runtime.HandleError(errors.Wrapf(err, "can't get patcher for namespace %s", namespace))
|
||||
return nil
|
||||
}
|
||||
patcher := c.patcherManager.getPatcher(namespace)
|
||||
|
||||
// Get the FpgaRegion resource with this namespace/name
|
||||
region, err := c.regionLister.FpgaRegions(namespace).Get(name)
|
||||
|
@ -63,11 +63,10 @@ func (l *fakeAfLister) List(selector labels.Selector) (ret []*v1.AcceleratorFunc
|
||||
|
||||
func TestSyncAfHandler(t *testing.T) {
|
||||
tcases := []struct {
|
||||
name string
|
||||
key string
|
||||
afLister *fakeAfLister
|
||||
patcherManagerIsBroken bool
|
||||
expectedErr bool
|
||||
name string
|
||||
key string
|
||||
afLister *fakeAfLister
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "Wrong key format",
|
||||
@ -87,11 +86,6 @@ func TestSyncAfHandler(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Broken patcher manager",
|
||||
key: "default/arria10-nlb0",
|
||||
patcherManagerIsBroken: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown key",
|
||||
key: "default/unknown",
|
||||
@ -107,13 +101,7 @@ func TestSyncAfHandler(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tt := range tcases {
|
||||
pm, err := newPatcherManager(preprogrammed)
|
||||
if err != nil {
|
||||
t.Fatalf("Test case '%s': %+v", tt.name, err)
|
||||
}
|
||||
if tt.patcherManagerIsBroken {
|
||||
pm.defaultMode = "broken"
|
||||
}
|
||||
pm := newPatcherManager()
|
||||
c, err := newController(pm, &rest.Config{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test case '%s': %+v", tt.name, err)
|
||||
@ -162,11 +150,10 @@ func (l *fakeRegionLister) List(selector labels.Selector) (ret []*v1.FpgaRegion,
|
||||
|
||||
func TestSyncRegionHandler(t *testing.T) {
|
||||
tcases := []struct {
|
||||
name string
|
||||
key string
|
||||
patcherManagerIsBroken bool
|
||||
regionLister *fakeRegionLister
|
||||
expectedErr bool
|
||||
name string
|
||||
key string
|
||||
regionLister *fakeRegionLister
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "Wrong key format",
|
||||
@ -186,11 +173,6 @@ func TestSyncRegionHandler(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Broken patcher manager",
|
||||
key: "default/arria10",
|
||||
patcherManagerIsBroken: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown key",
|
||||
key: "default/unknown",
|
||||
@ -206,13 +188,7 @@ func TestSyncRegionHandler(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tt := range tcases {
|
||||
pm, err := newPatcherManager(preprogrammed)
|
||||
if err != nil {
|
||||
t.Fatalf("Test case '%s': %+v", tt.name, err)
|
||||
}
|
||||
if tt.patcherManagerIsBroken {
|
||||
pm.defaultMode = "broken"
|
||||
}
|
||||
pm := newPatcherManager()
|
||||
c, err := newController(pm, &rest.Config{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test case '%s': %+v", tt.name, err)
|
||||
@ -328,7 +304,7 @@ func TestProcessNextWorkItem(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tt := range tcases {
|
||||
pm, _ := newPatcherManager(preprogrammed)
|
||||
pm := newPatcherManager()
|
||||
c, err := newController(pm, &rest.Config{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test case '%s': %+v", tt.name, err)
|
||||
@ -369,7 +345,7 @@ func TestRun(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tt := range tcases {
|
||||
pm := &patcherManager{}
|
||||
pm := newPatcherManager()
|
||||
c, err := newController(pm, &rest.Config{})
|
||||
if err != nil {
|
||||
t.Fatalf("Test case '%s': %+v", tt.name, err)
|
||||
@ -404,7 +380,7 @@ func TestNewController(t *testing.T) {
|
||||
config := &rest.Config{
|
||||
Host: tt.configHost,
|
||||
}
|
||||
pm := &patcherManager{}
|
||||
pm := newPatcherManager()
|
||||
c, err := newController(pm, config)
|
||||
if err != nil && !tt.expectedErr {
|
||||
t.Errorf("Test case '%s': unexpected error: %+v", tt.name, err)
|
||||
|
@ -18,7 +18,6 @@ import (
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
@ -39,8 +38,6 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
preprogrammed = "preprogrammed"
|
||||
orchestrated = "orchestrated"
|
||||
controllerThreadNum = 1
|
||||
)
|
||||
|
||||
@ -69,7 +66,7 @@ func getTLSConfig(certFile string, keyFile string) *tls.Config {
|
||||
}
|
||||
}
|
||||
|
||||
func mutatePods(ar v1beta1.AdmissionReview, pm *patcherManager) *v1beta1.AdmissionResponse {
|
||||
func mutatePods(ar v1beta1.AdmissionReview, pm patcherManager) *v1beta1.AdmissionResponse {
|
||||
var ops []string
|
||||
|
||||
klog.V(4).Info("mutating pods")
|
||||
@ -97,11 +94,7 @@ func mutatePods(ar v1beta1.AdmissionReview, pm *patcherManager) *v1beta1.Admissi
|
||||
name = pod.ObjectMeta.GenerateName
|
||||
}
|
||||
klog.V(4).Infof("Received pod '%s' in name space '%s'", name, namespace)
|
||||
patcher, err := pm.getPatcher(namespace)
|
||||
if err != nil {
|
||||
klog.Warningf("%+v", err)
|
||||
return toAdmissionResponse(err)
|
||||
}
|
||||
patcher := pm.getPatcher(namespace)
|
||||
|
||||
reviewResponse := v1beta1.AdmissionResponse{}
|
||||
reviewResponse.Allowed = true
|
||||
@ -198,7 +191,7 @@ func serve(w http.ResponseWriter, r *http.Request, admit admitFunc) {
|
||||
}
|
||||
}
|
||||
|
||||
func makePodsHandler(pm *patcherManager) func(w http.ResponseWriter, r *http.Request) {
|
||||
func makePodsHandler(pm patcherManager) func(w http.ResponseWriter, r *http.Request) {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
serve(w, r, func(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse {
|
||||
return mutatePods(ar, pm)
|
||||
@ -211,7 +204,6 @@ func main() {
|
||||
var master string
|
||||
var certFile string
|
||||
var keyFile string
|
||||
var mode string
|
||||
var config *rest.Config
|
||||
var err error
|
||||
|
||||
@ -220,7 +212,6 @@ func main() {
|
||||
flag.StringVar(&certFile, "tls-cert-file", certFile,
|
||||
"File containing the x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert).")
|
||||
flag.StringVar(&keyFile, "tls-private-key-file", keyFile, "File containing the x509 private key matching --tls-cert-file.")
|
||||
flag.StringVar(&mode, "mode", preprogrammed, fmt.Sprintf("webhook mode: '%s' (default) or '%s'", preprogrammed, orchestrated))
|
||||
flag.Parse()
|
||||
|
||||
if certFile == "" {
|
||||
@ -248,18 +239,15 @@ func main() {
|
||||
klog.Fatal("Failed to get cluster config ", err)
|
||||
}
|
||||
|
||||
patcherManager, err := newPatcherManager(mode)
|
||||
if err != nil {
|
||||
klog.Fatalf("%+v", err)
|
||||
}
|
||||
pm := newPatcherManager()
|
||||
|
||||
controller, err := newController(patcherManager, config)
|
||||
controller, err := newController(pm, config)
|
||||
if err != nil {
|
||||
klog.Fatalf("%+v", err)
|
||||
}
|
||||
go controller.run(controllerThreadNum)
|
||||
|
||||
http.HandleFunc("/pods", makePodsHandler(patcherManager))
|
||||
http.HandleFunc("/pods", makePodsHandler(pm))
|
||||
|
||||
klog.V(4).Info("Webhook started")
|
||||
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
fpgav1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/fpga.intel.com/v1"
|
||||
"k8s.io/api/admission/v1beta1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
@ -144,16 +145,39 @@ func TestMutatePods(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
brokenPod := corev1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Namespace: "default",
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
Containers: []corev1.Container{
|
||||
{
|
||||
Name: "test-container",
|
||||
Image: "test-image",
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"cpu": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
podRaw, err := json.Marshal(pod)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
brokenPodRaw, err := json.Marshal(brokenPod)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
tcases := []struct {
|
||||
name string
|
||||
mode string
|
||||
ar v1beta1.AdmissionReview
|
||||
expectedResponse bool
|
||||
expectedAllowed bool
|
||||
expectedPatchOps int
|
||||
}{
|
||||
{
|
||||
@ -161,7 +185,6 @@ func TestMutatePods(t *testing.T) {
|
||||
ar: v1beta1.AdmissionReview{
|
||||
Request: &v1beta1.AdmissionRequest{},
|
||||
},
|
||||
mode: preprogrammed,
|
||||
},
|
||||
{
|
||||
name: "admission request without object",
|
||||
@ -170,8 +193,8 @@ func TestMutatePods(t *testing.T) {
|
||||
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
|
||||
},
|
||||
},
|
||||
mode: preprogrammed,
|
||||
expectedResponse: true,
|
||||
expectedAllowed: true,
|
||||
},
|
||||
{
|
||||
name: "admission request with corrupted object",
|
||||
@ -183,11 +206,10 @@ func TestMutatePods(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
mode: preprogrammed,
|
||||
expectedResponse: true,
|
||||
},
|
||||
{
|
||||
name: "non-empty admission request in preprogrammed mode",
|
||||
name: "successful non-empty admission request",
|
||||
ar: v1beta1.AdmissionReview{
|
||||
Request: &v1beta1.AdmissionRequest{
|
||||
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
|
||||
@ -196,23 +218,9 @@ func TestMutatePods(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
mode: preprogrammed,
|
||||
expectedResponse: true,
|
||||
expectedPatchOps: 4,
|
||||
},
|
||||
{
|
||||
name: "non-empty admission request in orchestrated mode",
|
||||
ar: v1beta1.AdmissionReview{
|
||||
Request: &v1beta1.AdmissionRequest{
|
||||
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
|
||||
Object: runtime.RawExtension{
|
||||
Raw: podRaw,
|
||||
},
|
||||
},
|
||||
},
|
||||
mode: orchestrated,
|
||||
expectedResponse: true,
|
||||
expectedPatchOps: 5,
|
||||
expectedAllowed: true,
|
||||
},
|
||||
{
|
||||
name: "handle error after wrong getPatchOps()",
|
||||
@ -220,48 +228,54 @@ func TestMutatePods(t *testing.T) {
|
||||
Request: &v1beta1.AdmissionRequest{
|
||||
Resource: metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
|
||||
Object: runtime.RawExtension{
|
||||
Raw: podRaw,
|
||||
Raw: brokenPodRaw,
|
||||
},
|
||||
},
|
||||
},
|
||||
mode: "unknown mode",
|
||||
expectedResponse: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tcase := range tcases {
|
||||
p := &patcher{
|
||||
mode: tcase.mode,
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
resourceMap: map[string]string{
|
||||
"fpga.intel.com/arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
}
|
||||
pm := &patcherManager{
|
||||
defaultMode: tcase.mode,
|
||||
patchers: map[string]*patcher{
|
||||
"default": p,
|
||||
},
|
||||
}
|
||||
resp := mutatePods(tcase.ar, pm)
|
||||
t.Run(tcase.name, func(t *testing.T) {
|
||||
p := newPatcher()
|
||||
p.addRegion(&fpgav1.FpgaRegion{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10",
|
||||
},
|
||||
Spec: fpgav1.FpgaRegionSpec{
|
||||
InterfaceID: "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
})
|
||||
pm := newPatcherManager()
|
||||
pm["default"] = p
|
||||
resp := mutatePods(tcase.ar, pm)
|
||||
|
||||
if !tcase.expectedResponse && resp != nil {
|
||||
t.Errorf("Test case '%s': got unexpected response", tcase.name)
|
||||
} else if tcase.expectedResponse && resp == nil {
|
||||
t.Errorf("Test case '%s': got no response", tcase.name)
|
||||
} else if tcase.expectedResponse && tcase.expectedPatchOps > 0 {
|
||||
var ops interface{}
|
||||
actualPatchOps := 0
|
||||
if !tcase.expectedResponse && resp != nil {
|
||||
t.Errorf("Test case '%s': got unexpected response", tcase.name)
|
||||
} else if tcase.expectedResponse && resp == nil {
|
||||
t.Errorf("Test case '%s': got no response", tcase.name)
|
||||
} else if tcase.expectedResponse {
|
||||
if tcase.expectedAllowed != resp.Allowed {
|
||||
t.Errorf("Allowed expected to be %t but got %t", tcase.expectedAllowed, resp.Allowed)
|
||||
} else if resp.Allowed && resp.Patch != nil {
|
||||
var ops interface{}
|
||||
|
||||
err := json.Unmarshal(resp.Patch, &ops)
|
||||
if err != nil {
|
||||
t.Errorf("Test case '%s': got unparsable patch '%s'", tcase.name, resp.Patch)
|
||||
} else if len(ops.([]interface{})) != tcase.expectedPatchOps {
|
||||
t.Errorf("Test case '%s': got wrong number of operations in the patch. Expected %d, but got %d\n%s",
|
||||
tcase.name, tcase.expectedPatchOps, len(ops.([]interface{})), string(resp.Patch))
|
||||
err := json.Unmarshal(resp.Patch, &ops)
|
||||
if err != nil {
|
||||
t.Errorf("Test case '%s': got unparsable patch '%s'", tcase.name, resp.Patch)
|
||||
} else {
|
||||
actualPatchOps = len(ops.([]interface{}))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if actualPatchOps != tcase.expectedPatchOps {
|
||||
t.Errorf("Test case '%s': got wrong number of operations in the patch. Expected %d, but got %d\n%s",
|
||||
tcase.name, tcase.expectedPatchOps, actualPatchOps, string(resp.Patch))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -280,6 +294,6 @@ func (*fakeResponseWriter) WriteHeader(int) {
|
||||
}
|
||||
|
||||
func TestMakePodsHandler(t *testing.T) {
|
||||
serveFunc := makePodsHandler(&patcherManager{})
|
||||
serveFunc := makePodsHandler(newPatcherManager())
|
||||
serveFunc(&fakeResponseWriter{}, &http.Request{})
|
||||
}
|
||||
|
@ -17,7 +17,6 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
@ -33,14 +32,12 @@ import (
|
||||
const (
|
||||
namespace = "fpga.intel.com"
|
||||
|
||||
resourceReplaceOp = `{
|
||||
"op": "remove",
|
||||
"path": "/spec/containers/%d/resources/%s/%s"
|
||||
}, {
|
||||
"op": "add",
|
||||
"path": "/spec/containers/%d/resources/%s/%s",
|
||||
"value": %s
|
||||
}`
|
||||
af = "af"
|
||||
region = "region"
|
||||
// "regiondevel" corresponds to the FPGA plugin's regiondevel mode. It requires
|
||||
// FpgaRegion CRDs to be added to the cluster.
|
||||
regiondevel = "regiondevel"
|
||||
|
||||
resourceRemoveOp = `{
|
||||
"op": "remove",
|
||||
"path": "/spec/containers/%d/resources/%s/%s"
|
||||
@ -64,48 +61,52 @@ const (
|
||||
{{- end -}}
|
||||
]
|
||||
}`
|
||||
|
||||
// Names of extended resources cannot be longer than 63 characters.
|
||||
// Therefore for AF resources we have to cut the interface ID prefix
|
||||
// to 31 characters only.
|
||||
interfaceIDPrefixLength = 31
|
||||
)
|
||||
|
||||
var (
|
||||
rfc6901Escaper = strings.NewReplacer("~", "~0", "/", "~1")
|
||||
resourceRe = regexp.MustCompile(namespace + `/(?P<Region>[[:alnum:].]+)(-(?P<Af>[[:alnum:]]+))?`)
|
||||
)
|
||||
|
||||
type patcher struct {
|
||||
sync.Mutex
|
||||
|
||||
mode string
|
||||
regionMap map[string]string
|
||||
afMap map[string]string
|
||||
resourceMap map[string]string
|
||||
afMap map[string]*fpgav1.AcceleratorFunction
|
||||
resourceMap map[string]string
|
||||
resourceModeMap map[string]string
|
||||
}
|
||||
|
||||
func newPatcher(mode string) (*patcher, error) {
|
||||
if mode != preprogrammed && mode != orchestrated {
|
||||
return nil, errors.Errorf("Unknown mode: %s", mode)
|
||||
}
|
||||
|
||||
func newPatcher() *patcher {
|
||||
return &patcher{
|
||||
mode: mode,
|
||||
regionMap: make(map[string]string),
|
||||
afMap: make(map[string]string),
|
||||
resourceMap: make(map[string]string),
|
||||
}, nil
|
||||
afMap: make(map[string]*fpgav1.AcceleratorFunction),
|
||||
resourceMap: make(map[string]string),
|
||||
resourceModeMap: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
func (p *patcher) addAf(af *fpgav1.AcceleratorFunction) {
|
||||
func (p *patcher) addAf(accfunc *fpgav1.AcceleratorFunction) {
|
||||
defer p.Unlock()
|
||||
p.Lock()
|
||||
|
||||
p.afMap[af.Name] = af.Spec.AfuID
|
||||
p.resourceMap[namespace+"/"+af.Name] = rfc6901Escaper.Replace(namespace + "/af-" + af.Spec.AfuID)
|
||||
p.afMap[namespace+"/"+accfunc.Name] = accfunc
|
||||
if accfunc.Spec.Mode == af {
|
||||
p.resourceMap[namespace+"/"+accfunc.Name] = rfc6901Escaper.Replace(namespace + "/" +
|
||||
accfunc.Spec.InterfaceID[:interfaceIDPrefixLength] + accfunc.Spec.AfuID)
|
||||
} else {
|
||||
p.resourceMap[namespace+"/"+accfunc.Name] = rfc6901Escaper.Replace(namespace + "/region-" + accfunc.Spec.InterfaceID)
|
||||
}
|
||||
p.resourceModeMap[namespace+"/"+accfunc.Name] = accfunc.Spec.Mode
|
||||
}
|
||||
|
||||
func (p *patcher) addRegion(region *fpgav1.FpgaRegion) {
|
||||
defer p.Unlock()
|
||||
p.Lock()
|
||||
|
||||
p.regionMap[region.Name] = region.Spec.InterfaceID
|
||||
p.resourceModeMap[namespace+"/"+region.Name] = regiondevel
|
||||
p.resourceMap[namespace+"/"+region.Name] = rfc6901Escaper.Replace(namespace + "/region-" + region.Spec.InterfaceID)
|
||||
}
|
||||
|
||||
@ -113,149 +114,138 @@ func (p *patcher) removeAf(name string) {
|
||||
defer p.Unlock()
|
||||
p.Lock()
|
||||
|
||||
delete(p.afMap, name)
|
||||
delete(p.afMap, namespace+"/"+name)
|
||||
delete(p.resourceMap, namespace+"/"+name)
|
||||
delete(p.resourceModeMap, namespace+"/"+name)
|
||||
}
|
||||
|
||||
func (p *patcher) removeRegion(name string) {
|
||||
defer p.Unlock()
|
||||
p.Lock()
|
||||
|
||||
delete(p.regionMap, name)
|
||||
delete(p.resourceMap, namespace+"/"+name)
|
||||
delete(p.resourceModeMap, namespace+"/"+name)
|
||||
}
|
||||
|
||||
// getRequestedResources validates the container's requirements first, then returns them as a map.
|
||||
func getRequestedResources(container corev1.Container) (map[string]int64, error) {
|
||||
for _, v := range container.Env {
|
||||
if strings.HasPrefix(v.Name, "FPGA_REGION") || strings.HasPrefix(v.Name, "FPGA_AFU") {
|
||||
return nil, errors.Errorf("environment variable '%s' is not allowed", v.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Container may happen to have Requests, but not Limits. Check Requests first,
|
||||
// then in the next loop iterate over Limits.
|
||||
for resourceName, resourceQuantity := range container.Resources.Requests {
|
||||
rname := strings.ToLower(string(resourceName))
|
||||
if !strings.HasPrefix(rname, namespace) {
|
||||
// Skip non-FPGA resources in Requests.
|
||||
continue
|
||||
}
|
||||
|
||||
if container.Resources.Limits[resourceName] != resourceQuantity {
|
||||
return nil, errors.Errorf(
|
||||
"'limits' and 'requests' for %q must be equal as extended resources cannot be overcommitted",
|
||||
rname)
|
||||
}
|
||||
}
|
||||
|
||||
resources := make(map[string]int64)
|
||||
for resourceName, resourceQuantity := range container.Resources.Limits {
|
||||
rname := strings.ToLower(string(resourceName))
|
||||
if !strings.HasPrefix(rname, namespace) {
|
||||
// Skip non-FPGA resources in Limits.
|
||||
continue
|
||||
}
|
||||
|
||||
if container.Resources.Requests[resourceName] != resourceQuantity {
|
||||
return nil, errors.Errorf(
|
||||
"'limits' and 'requests' for %q must be equal as extended resources cannot be overcommitted",
|
||||
rname)
|
||||
}
|
||||
|
||||
quantity, ok := resourceQuantity.AsInt64()
|
||||
if !ok {
|
||||
return nil, errors.Errorf("resource quantity isn't of integral type for %q", rname)
|
||||
}
|
||||
|
||||
resources[rname] = quantity
|
||||
}
|
||||
|
||||
return resources, nil
|
||||
}
|
||||
|
||||
func (p *patcher) getPatchOps(containerIdx int, container corev1.Container) ([]string, error) {
|
||||
switch p.mode {
|
||||
case preprogrammed:
|
||||
return p.getPatchOpsPreprogrammed(containerIdx, container)
|
||||
case orchestrated:
|
||||
return p.getPatchOpsOrchestrated(containerIdx, container)
|
||||
}
|
||||
|
||||
return nil, errors.Errorf("Uknown mode: %s", p.mode)
|
||||
}
|
||||
|
||||
func (p *patcher) getPatchOpsPreprogrammed(containerIdx int, container corev1.Container) ([]string, error) {
|
||||
var ops []string
|
||||
|
||||
for resourceName, resourceQuantity := range container.Resources.Limits {
|
||||
newName, err := p.translateFpgaResourceName(resourceName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(newName) > 0 {
|
||||
op := fmt.Sprintf(resourceReplaceOp, containerIdx,
|
||||
"limits", rfc6901Escaper.Replace(string(resourceName)),
|
||||
containerIdx, "limits", newName, resourceQuantity.String())
|
||||
ops = append(ops, op)
|
||||
}
|
||||
}
|
||||
for resourceName, resourceQuantity := range container.Resources.Requests {
|
||||
newName, err := p.translateFpgaResourceName(resourceName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(newName) > 0 {
|
||||
op := fmt.Sprintf(resourceReplaceOp, containerIdx,
|
||||
"requests", rfc6901Escaper.Replace(string(resourceName)),
|
||||
containerIdx, "requests", newName, resourceQuantity.String())
|
||||
ops = append(ops, op)
|
||||
}
|
||||
}
|
||||
|
||||
return ops, nil
|
||||
}
|
||||
|
||||
func (p *patcher) translateFpgaResourceName(oldname corev1.ResourceName) (string, error) {
|
||||
rname := strings.ToLower(string(oldname))
|
||||
if !strings.HasPrefix(rname, namespace) {
|
||||
return "", nil
|
||||
requestedResources, err := getRequestedResources(container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer p.Unlock()
|
||||
p.Lock()
|
||||
|
||||
if newname, ok := p.resourceMap[rname]; ok {
|
||||
return newname, nil
|
||||
}
|
||||
|
||||
return "", errors.Errorf("Unknown FPGA resource: %s", rname)
|
||||
}
|
||||
|
||||
func (p *patcher) checkResourceRequests(container corev1.Container) error {
|
||||
for resourceName, resourceQuantity := range container.Resources.Requests {
|
||||
interfaceID, _, err := p.parseResourceName(string(resourceName))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if interfaceID == "" {
|
||||
// Skip non-FPGA resources
|
||||
continue
|
||||
}
|
||||
if container.Resources.Limits[resourceName] != resourceQuantity {
|
||||
return errors.Errorf("'limits' and 'requests' for %s must be equal", string(resourceName))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *patcher) getPatchOpsOrchestrated(containerIdx int, container corev1.Container) ([]string, error) {
|
||||
var ops []string
|
||||
|
||||
for _, v := range container.Env {
|
||||
if strings.HasPrefix(v.Name, "FPGA_REGION") || strings.HasPrefix(v.Name, "FPGA_AFU") {
|
||||
return nil, errors.Errorf("The environment variable '%s' is not allowed", v.Name)
|
||||
}
|
||||
}
|
||||
|
||||
if err := p.checkResourceRequests(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
regions := make(map[string]int64)
|
||||
fpgaPluginMode := ""
|
||||
resources := make(map[string]int64)
|
||||
envVars := make(map[string]string)
|
||||
counter := 0
|
||||
for resourceName, resourceQuantity := range container.Resources.Limits {
|
||||
interfaceID, afuID, err := p.parseResourceName(string(resourceName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
for rname, quantity := range requestedResources {
|
||||
|
||||
mode, found := p.resourceModeMap[rname]
|
||||
if !found {
|
||||
return nil, errors.Errorf("no such resource: %q", rname)
|
||||
}
|
||||
|
||||
if interfaceID == "" && afuID == "" {
|
||||
// Skip non-FPGA resources
|
||||
continue
|
||||
switch mode {
|
||||
case regiondevel:
|
||||
// Do nothing.
|
||||
// The requested resources are exposed by FPGA plugins working in "regiondevel" mode.
|
||||
// In this mode the workload is supposed to program FPGA regions.
|
||||
// A cluster admin has to add FpgaRegion CRDs to allow this.
|
||||
case af:
|
||||
// Do nothing.
|
||||
// The requested resources are exposed by FPGA plugins working in "af" mode.
|
||||
case region:
|
||||
// Let fpga_crihook know how to program the regions by setting ENV variables.
|
||||
// The requested resources are exposed by FPGA plugins working in "region" mode.
|
||||
for i := int64(0); i < quantity; i++ {
|
||||
counter++
|
||||
envVars[fmt.Sprintf("FPGA_REGION_%d", counter)] = p.afMap[rname].Spec.InterfaceID
|
||||
envVars[fmt.Sprintf("FPGA_AFU_%d", counter)] = p.afMap[rname].Spec.AfuID
|
||||
}
|
||||
default:
|
||||
msg := fmt.Sprintf("%q is registered with unknown mode %q instead of %q or %q",
|
||||
rname, p.resourceModeMap[rname], af, region)
|
||||
// Let admin know about broken af CRD.
|
||||
klog.Error(msg)
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
|
||||
if container.Resources.Requests[resourceName] != resourceQuantity {
|
||||
return nil, errors.Errorf("'limits' and 'requests' for %s must be equal", string(resourceName))
|
||||
if fpgaPluginMode == "" {
|
||||
fpgaPluginMode = mode
|
||||
} else if fpgaPluginMode != mode {
|
||||
return nil, errors.New("container cannot be scheduled as it requires resources operated in different modes")
|
||||
}
|
||||
|
||||
quantity, ok := resourceQuantity.AsInt64()
|
||||
if !ok {
|
||||
return nil, errors.New("Resource quantity isn't of integral type")
|
||||
}
|
||||
regions[interfaceID] = regions[interfaceID] + quantity
|
||||
mappedName := p.resourceMap[rname]
|
||||
resources[mappedName] = resources[mappedName] + quantity
|
||||
|
||||
for i := int64(0); i < quantity; i++ {
|
||||
counter++
|
||||
envVars[fmt.Sprintf("FPGA_REGION_%d", counter)] = interfaceID
|
||||
envVars[fmt.Sprintf("FPGA_AFU_%d", counter)] = afuID
|
||||
}
|
||||
|
||||
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "limits", rfc6901Escaper.Replace(string(resourceName))))
|
||||
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "requests", rfc6901Escaper.Replace(string(resourceName))))
|
||||
// Add operations to remove unresolved resources from the pod.
|
||||
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "limits", rfc6901Escaper.Replace(rname)))
|
||||
ops = append(ops, fmt.Sprintf(resourceRemoveOp, containerIdx, "requests", rfc6901Escaper.Replace(rname)))
|
||||
}
|
||||
|
||||
for interfaceID, quantity := range regions {
|
||||
op := fmt.Sprintf(resourceAddOp, containerIdx, "limits", rfc6901Escaper.Replace(namespace+"/region-"+interfaceID), quantity)
|
||||
// Add operations to add resolved resources to the pod.
|
||||
for resource, quantity := range resources {
|
||||
op := fmt.Sprintf(resourceAddOp, containerIdx, "limits", resource, quantity)
|
||||
ops = append(ops, op)
|
||||
op = fmt.Sprintf(resourceAddOp, containerIdx, "requests", rfc6901Escaper.Replace(namespace+"/region-"+interfaceID), quantity)
|
||||
op = fmt.Sprintf(resourceAddOp, containerIdx, "requests", resource, quantity)
|
||||
ops = append(ops, op)
|
||||
}
|
||||
|
||||
// Add the ENV variables to the pod if needed.
|
||||
if len(envVars) > 0 {
|
||||
for _, envvar := range container.Env {
|
||||
envVars[envvar.Name] = envvar.Value
|
||||
@ -276,69 +266,21 @@ func (p *patcher) getPatchOpsOrchestrated(containerIdx int, container corev1.Con
|
||||
return ops, nil
|
||||
}
|
||||
|
||||
func (p *patcher) parseResourceName(input string) (string, string, error) {
|
||||
var interfaceID, afuID string
|
||||
var regionName, afName string
|
||||
var ok bool
|
||||
|
||||
result := resourceRe.FindStringSubmatch(input)
|
||||
if result == nil {
|
||||
return "", "", nil
|
||||
}
|
||||
|
||||
defer p.Unlock()
|
||||
p.Lock()
|
||||
|
||||
for num, group := range resourceRe.SubexpNames() {
|
||||
switch group {
|
||||
case "Region":
|
||||
regionName = result[num]
|
||||
if interfaceID, ok = p.regionMap[result[num]]; !ok {
|
||||
return "", "", errors.Errorf("Unknown region name: %s", result[num])
|
||||
}
|
||||
case "Af":
|
||||
afName = result[num]
|
||||
}
|
||||
}
|
||||
|
||||
if afName != "" {
|
||||
if afuID, ok = p.afMap[regionName+"-"+afName]; !ok {
|
||||
return "", "", errors.Errorf("Unknown AF name: %s", regionName+"-"+afName)
|
||||
}
|
||||
}
|
||||
|
||||
return interfaceID, afuID, nil
|
||||
}
|
||||
|
||||
// patcherManager keeps track of patchers registered for different Kubernetes namespaces.
|
||||
type patcherManager struct {
|
||||
defaultMode string
|
||||
patchers map[string]*patcher
|
||||
type patcherManager map[string]*patcher
|
||||
|
||||
func newPatcherManager() patcherManager {
|
||||
return make(map[string]*patcher)
|
||||
}
|
||||
|
||||
func newPatcherManager(defaultMode string) (*patcherManager, error) {
|
||||
if defaultMode != preprogrammed && defaultMode != orchestrated {
|
||||
return nil, errors.Errorf("Unknown mode: %s", defaultMode)
|
||||
func (pm patcherManager) getPatcher(namespace string) *patcher {
|
||||
if p, ok := pm[namespace]; ok {
|
||||
return p
|
||||
}
|
||||
|
||||
return &patcherManager{
|
||||
defaultMode: defaultMode,
|
||||
patchers: make(map[string]*patcher),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (pm *patcherManager) getPatcher(namespace string) (*patcher, error) {
|
||||
if p, ok := pm.patchers[namespace]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
p, err := newPatcher(pm.defaultMode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pm.patchers[namespace] = p
|
||||
p := newPatcher()
|
||||
pm[namespace] = p
|
||||
klog.V(4).Info("created new patcher for namespace", namespace)
|
||||
|
||||
return p, nil
|
||||
return p
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ import (
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/klog"
|
||||
|
||||
fpgav1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/fpga.intel.com/v1"
|
||||
)
|
||||
@ -48,186 +47,51 @@ func TestPatcherStorageFunctions(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
p, err := newPatcher(preprogrammed)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
p := newPatcher()
|
||||
|
||||
p.addAf(af)
|
||||
if len(p.afMap) != 1 || len(p.resourceMap) != 1 {
|
||||
if len(p.resourceModeMap) != 1 || len(p.afMap) != 1 || len(p.resourceMap) != 1 {
|
||||
t.Error("Failed to add AF to patcher")
|
||||
}
|
||||
|
||||
p.removeAf(af.Name)
|
||||
if len(p.afMap) != 0 || len(p.resourceMap) != 0 {
|
||||
if len(p.resourceModeMap) != 0 || len(p.afMap) != 0 || len(p.resourceMap) != 0 {
|
||||
t.Error("Failed to remove AF from patcher")
|
||||
}
|
||||
|
||||
p.addRegion(region)
|
||||
if len(p.regionMap) != 1 || len(p.resourceMap) != 1 {
|
||||
if len(p.resourceModeMap) != 1 || len(p.resourceMap) != 1 {
|
||||
t.Error("Failed to add fpga region to patcher")
|
||||
}
|
||||
|
||||
p.removeRegion(region.Name)
|
||||
if len(p.regionMap) != 0 || len(p.resourceMap) != 0 {
|
||||
if len(p.resourceModeMap) != 0 || len(p.resourceMap) != 0 {
|
||||
t.Error("Failed to remove fpga region from patcher")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetPatchOpsPreprogrammed(t *testing.T) {
|
||||
func TestGetPatchOps(t *testing.T) {
|
||||
tcases := []struct {
|
||||
name string
|
||||
resourceMap map[string]string
|
||||
container corev1.Container
|
||||
afs []*fpgav1.AcceleratorFunction
|
||||
regions []*fpgav1.FpgaRegion
|
||||
expectedErr bool
|
||||
expectedOps int
|
||||
}{
|
||||
{
|
||||
name: "Empty container",
|
||||
},
|
||||
{
|
||||
name: "Unknown resource in limits",
|
||||
name: "Successful handling for region mode",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown resource in requests",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Successful case",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb0-alias": resource.MustParse("2"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
},
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
resourceMap: map[string]string{
|
||||
"fpga.intel.com/arria10-nlb0": rfc6901Escaper.Replace("fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b"),
|
||||
},
|
||||
expectedOps: 2,
|
||||
},
|
||||
}
|
||||
for _, tt := range tcases {
|
||||
p := &patcher{
|
||||
resourceMap: tt.resourceMap,
|
||||
}
|
||||
ops, err := p.getPatchOpsPreprogrammed(0, tt.container)
|
||||
if tt.expectedErr && err == nil {
|
||||
t.Errorf("Test case '%s': no error returned", tt.name)
|
||||
}
|
||||
if !tt.expectedErr && err != nil {
|
||||
t.Errorf("Test case '%s': unexpected error %v", tt.name, err)
|
||||
}
|
||||
if len(ops) != tt.expectedOps {
|
||||
t.Errorf("test case '%s': expected %d ops, but got %d\n%v", tt.name, tt.expectedOps, len(ops), ops)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseResourceName(t *testing.T) {
|
||||
tcases := []struct {
|
||||
input string
|
||||
interfaceID string
|
||||
afuID string
|
||||
afMap map[string]string
|
||||
regionMap map[string]string
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
input: "fpga.intel.com/arria10",
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
interfaceID: "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
{
|
||||
input: "fpga.intel.com/arria10-unknown",
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
input: "fpga.intel.com/unknown",
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
input: "fpga.example.com/something",
|
||||
},
|
||||
{
|
||||
input: "fpga.intel.com/arria10-nlb0",
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
afMap: map[string]string{
|
||||
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
|
||||
},
|
||||
interfaceID: "ce48969398f05f33946d560708be108a",
|
||||
afuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
},
|
||||
}
|
||||
|
||||
for num, tt := range tcases {
|
||||
p := &patcher{
|
||||
afMap: tt.afMap,
|
||||
regionMap: tt.regionMap,
|
||||
}
|
||||
interfaceID, afuID, err := p.parseResourceName(tt.input)
|
||||
if tt.expectedErr {
|
||||
if err != nil {
|
||||
continue
|
||||
} else {
|
||||
t.Errorf("In case %d we didn't get error", num)
|
||||
}
|
||||
}
|
||||
if tt.interfaceID != interfaceID || tt.afuID != afuID {
|
||||
t.Errorf("In case %d expected (%s, %s), but got (%s, %s)", num, tt.interfaceID, tt.afuID, interfaceID, afuID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetPatchOpsOrchestrated(t *testing.T) {
|
||||
tcases := []struct {
|
||||
name string
|
||||
container corev1.Container
|
||||
afMap map[string]string
|
||||
regionMap map[string]string
|
||||
expectedErr bool
|
||||
expectedOps int
|
||||
}{
|
||||
{
|
||||
name: "Successful handling",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
},
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb0-alias": resource.MustParse("2"),
|
||||
"cpu": resource.MustParse("3"),
|
||||
},
|
||||
},
|
||||
Env: []corev1.EnvVar{
|
||||
@ -237,13 +101,79 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
afs: []*fpgav1.AcceleratorFunction{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10-nlb0",
|
||||
},
|
||||
Spec: fpgav1.AcceleratorFunctionSpec{
|
||||
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
InterfaceID: "ce48969398f05f33946d560708be108a",
|
||||
Mode: region,
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10-nlb0-alias",
|
||||
},
|
||||
Spec: fpgav1.AcceleratorFunctionSpec{
|
||||
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
InterfaceID: "ce48969398f05f33946d560708be108a",
|
||||
Mode: region,
|
||||
},
|
||||
},
|
||||
},
|
||||
afMap: map[string]string{
|
||||
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
|
||||
expectedOps: 7,
|
||||
},
|
||||
{
|
||||
name: "Successful handling for af mode",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
},
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOps: 5,
|
||||
afs: []*fpgav1.AcceleratorFunction{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10-nlb0",
|
||||
},
|
||||
Spec: fpgav1.AcceleratorFunctionSpec{
|
||||
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
InterfaceID: "ce48969398f05f33946d560708be108a",
|
||||
Mode: af,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOps: 4,
|
||||
},
|
||||
{
|
||||
name: "Successful handling for regiondevel mode",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10": resource.MustParse("1"),
|
||||
},
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
regions: []*fpgav1.FpgaRegion{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10",
|
||||
},
|
||||
Spec: fpgav1.FpgaRegionSpec{
|
||||
InterfaceID: "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOps: 4,
|
||||
},
|
||||
{
|
||||
name: "Unequal FPGA resources in Limits and Requests 1",
|
||||
@ -255,13 +185,6 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
afMap: map[string]string{
|
||||
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
|
||||
"arria10-nlb3": "f7df405cbd7acf7222f144b0b93acd18",
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
@ -270,45 +193,19 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb3": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb3": resource.MustParse("2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
afMap: map[string]string{
|
||||
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
|
||||
"arria10-nlb3": "f7df405cbd7acf7222f144b0b93acd18",
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown FPGA model in Requests",
|
||||
name: "Unknown FPGA resources in container requirements",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/unknown-nlb0": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown AFU in Requests",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown FPGA model in Limits",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/unknown-nlb0": resource.MustParse("1"),
|
||||
},
|
||||
@ -316,23 +213,12 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown AFU in Limits",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-unknown": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Wrong ENV",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
Env: []corev1.EnvVar{
|
||||
@ -342,12 +228,6 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
},
|
||||
afMap: map[string]string{
|
||||
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
@ -362,59 +242,119 @@ func TestGetPatchOpsOrchestrated(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
regionMap: map[string]string{
|
||||
"arria10": "ce48969398f05f33946d560708be108a",
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Require resources operated in af and region modes",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb3": resource.MustParse("2"),
|
||||
"cpu": resource.MustParse("1"),
|
||||
},
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
"fpga.intel.com/arria10-nlb3": resource.MustParse("2"),
|
||||
"cpu": resource.MustParse("3"),
|
||||
},
|
||||
},
|
||||
},
|
||||
afMap: map[string]string{
|
||||
"arria10-nlb0": "d8424dc4a4a3c413f89e433683f9040b",
|
||||
afs: []*fpgav1.AcceleratorFunction{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10-nlb0",
|
||||
},
|
||||
Spec: fpgav1.AcceleratorFunctionSpec{
|
||||
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
InterfaceID: "ce48969398f05f33946d560708be108a",
|
||||
Mode: region,
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10-nlb3",
|
||||
},
|
||||
Spec: fpgav1.AcceleratorFunctionSpec{
|
||||
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
InterfaceID: "f7df405cbd7acf7222f144b0b93acd18",
|
||||
Mode: af,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
{
|
||||
name: "Unknown mode",
|
||||
container: corev1.Container{
|
||||
Resources: corev1.ResourceRequirements{
|
||||
Limits: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
},
|
||||
Requests: corev1.ResourceList{
|
||||
"fpga.intel.com/arria10-nlb0": resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
afs: []*fpgav1.AcceleratorFunction{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "arria10-nlb0",
|
||||
},
|
||||
Spec: fpgav1.AcceleratorFunctionSpec{
|
||||
AfuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
InterfaceID: "ce48969398f05f33946d560708be108a",
|
||||
Mode: "unknown",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tcases {
|
||||
p := &patcher{
|
||||
afMap: tt.afMap,
|
||||
regionMap: tt.regionMap,
|
||||
}
|
||||
klog.V(4).Info(tt.name)
|
||||
ops, err := p.getPatchOpsOrchestrated(0, tt.container)
|
||||
if tt.expectedErr && err == nil {
|
||||
t.Errorf("Test case '%s': no error returned", tt.name)
|
||||
}
|
||||
if !tt.expectedErr && err != nil {
|
||||
t.Errorf("Test case '%s': unexpected error %+v", tt.name, err)
|
||||
}
|
||||
if len(ops) != tt.expectedOps {
|
||||
t.Errorf("test case '%s': expected %d ops, but got %d\n%v", tt.name, tt.expectedOps, len(ops), ops)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewPatcherManager(t *testing.T) {
|
||||
tcases := []struct {
|
||||
name string
|
||||
defaultMode string
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "Everything is OK",
|
||||
defaultMode: preprogrammed,
|
||||
},
|
||||
{
|
||||
name: "Unknown default mode",
|
||||
defaultMode: "unknownMode",
|
||||
expectedErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tcases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, err := newPatcherManager(tt.defaultMode)
|
||||
p := newPatcher()
|
||||
for _, af := range tt.afs {
|
||||
p.addAf(af)
|
||||
}
|
||||
for _, region := range tt.regions {
|
||||
p.addRegion(region)
|
||||
}
|
||||
ops, err := p.getPatchOps(0, tt.container)
|
||||
if tt.expectedErr && err == nil {
|
||||
t.Errorf("Test case '%s': no error returned", tt.name)
|
||||
}
|
||||
if !tt.expectedErr && err != nil {
|
||||
t.Errorf("Test case '%s': unexpected error %+v", tt.name, err)
|
||||
t.Errorf("Test case '%s': unexpected error: %+v", tt.name, err)
|
||||
}
|
||||
if len(ops) != tt.expectedOps {
|
||||
t.Errorf("test case '%s': expected %d ops, but got %d\n%v", tt.name, tt.expectedOps, len(ops), ops)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetPatcher(t *testing.T) {
|
||||
namespace := "test"
|
||||
tcases := []struct {
|
||||
name string
|
||||
pm patcherManager
|
||||
}{
|
||||
{
|
||||
name: "Create new patcher",
|
||||
pm: newPatcherManager(),
|
||||
},
|
||||
{
|
||||
name: "Return existing patcher",
|
||||
pm: map[string]*patcher{namespace: newPatcher()},
|
||||
},
|
||||
}
|
||||
for _, tt := range tcases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p := tt.pm.getPatcher(namespace)
|
||||
if p != tt.pm[namespace] {
|
||||
t.Error("stored and received patchers are not equal")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* [Verify node kubelet config](#verify-node-kubelet-config)
|
||||
* [Deploying as a DaemonSet](#deploying-as-a-daemonset)
|
||||
* [Create a service account](#create-a-service-account)
|
||||
* [Deploying `orchestrated` mode](#deploying-orchestrated-mode)
|
||||
* [Deploying `region` mode](#deploying-region-mode)
|
||||
* [Deploying `af` mode](#deploying-af-mode)
|
||||
* [Deploy the DaemonSet](#deploy-the-daemonset)
|
||||
* [Verify plugin registration](#verify-plugin-registration)
|
||||
@ -75,26 +75,26 @@ development, initial deployment and debugging.
|
||||
|
||||
The FPGA plugin set can run in one of two modes:
|
||||
|
||||
- `region`/`orchestrated` mode, where the plugins locate and advertise
|
||||
- `region` mode, where the plugins locate and advertise
|
||||
regions of the FPGA, and facilitate programing of those regions with the
|
||||
requested bistreams.
|
||||
- `af`/`preprogrammed` mode, where the FPGA bitstreams are already loaded
|
||||
- `af` mode, where the FPGA bitstreams are already loaded
|
||||
onto the FPGA, and the plugins discover and advertises the existing
|
||||
Accelerator Functions (AF).
|
||||
|
||||
The example YAML deployments described in this document only currently support
|
||||
`af`/`preprogrammed` mode. To utilise `region`/`orchestrated` mode, either modify
|
||||
the existing YAML appropriately, or deploy 'by hand'.
|
||||
`af` mode. To utilise `region` mode, either modify the existing YAML appropriately,
|
||||
or deploy 'by hand'.
|
||||
|
||||
Overview diagrams of `preprogrammed` and `orchestrated` modes are below:
|
||||
Overview diagrams of `af` and `region` modes are below:
|
||||
|
||||
Orchestrated/region mode:
|
||||
region mode:
|
||||
|
||||

|
||||

|
||||
|
||||
Preprogrammed/af mode:
|
||||
af mode:
|
||||
|
||||

|
||||

|
||||
|
||||
# Installation
|
||||
|
||||
@ -136,12 +136,11 @@ major components:
|
||||
- [FPGA admission controller webhook](../fpga_admissionwebhook/README.md)
|
||||
- [FPGA prestart CRI-O hook](../fpga_crihook/README.md)
|
||||
|
||||
The CRI-O hook is only *required* if `orchestrated` FPGA bitstream programming mode is
|
||||
being used, but is installed by default by the
|
||||
The CRI-O hook is only *required* if `region` mode is being used, but is installed by default by the
|
||||
[FPGA plugin DaemonSet YAML](../../deployments/fpga_plugin/fpga_plugin.yaml), and is benign
|
||||
in `preprogrammed` mode.
|
||||
in `af` mode.
|
||||
|
||||
If using the `preprogrammed` mode, and therefore *not* using the
|
||||
If using the `af` mode, and therefore *not* using the
|
||||
CRI-O prestart hook, runtimes other than CRI-O can be used (that is, the CRI-O hook presently
|
||||
*only* works with the CRI-O runtime).
|
||||
|
||||
@ -192,8 +191,8 @@ YAML deployment files to reference your required image.
|
||||
### For beta testing: new deployment model
|
||||
|
||||
The FPGA plugin deployment is currently being rewritten to enable
|
||||
straight-forward deployment of both `af/preprogrammed` and
|
||||
`region/orchestrated` modes. The deployment has two steps:
|
||||
straight-forward deployment of both `af` and
|
||||
`region` modes. The deployment has two steps:
|
||||
|
||||
1. Run `scripts/fpga-plugin-prepare-for-kustomization.sh`. This will
|
||||
create the necessary secrets: a key and a signed certificate for
|
||||
@ -226,19 +225,16 @@ clusterrole.rbac.authorization.k8s.io/node-getter created
|
||||
clusterrolebinding.rbac.authorization.k8s.io/get-nodes created
|
||||
```
|
||||
|
||||
### Deploying `orchestrated` mode
|
||||
### Deploying `region` mode
|
||||
|
||||
To deploy the FPGA plugin DaemonSet in `orchestrated` (`region`) mode, you need to set the plugin
|
||||
To deploy the FPGA plugin DaemonSet in `region` mode, you need to set the plugin
|
||||
mode annotation on all of your nodes, otherwise the FPGA plugin will run in its default
|
||||
`af` (`preprogrammed`) mode.
|
||||
`af` mode.
|
||||
|
||||
```bash
|
||||
$ kubectl annotate node --all 'fpga.intel.com/device-plugin-mode=region'
|
||||
```
|
||||
|
||||
Mixing of the two modes (`orchestrated` and `af`) across nodes in the same cluster is
|
||||
*not currently supported*.
|
||||
|
||||
### Deploying `af` mode
|
||||
|
||||
To deploy the FPGA plugin DaemonSet in `af` mode, you do not need to set the mode annotation on
|
||||
@ -260,7 +256,7 @@ daemonset.apps/intel-fpga-plugin created
|
||||
### Verify plugin registration
|
||||
|
||||
Verify the FPGA plugin has been deployed on the nodes. The below shows the output
|
||||
you can expect in `region` mode, but similar output should be expected for `preprogrammed`
|
||||
you can expect in `region` mode, but similar output should be expected for `af`
|
||||
mode:
|
||||
|
||||
```bash
|
||||
|
@ -116,7 +116,7 @@ func getDevicesDFL() []device {
|
||||
},
|
||||
{
|
||||
id: "dfl-port.4",
|
||||
afuID: "d8424dc4a4a3c413f89e433683f9040b",
|
||||
afuID: unhealthyAfuID,
|
||||
devNode: "/dev/dfl-port.4",
|
||||
},
|
||||
},
|
||||
@ -240,7 +240,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
|
||||
Permissions: "rw",
|
||||
},
|
||||
}
|
||||
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "dfl-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
|
||||
nodes = []pluginapi.DeviceSpec{
|
||||
{
|
||||
@ -250,7 +250,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "dfl-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
|
||||
nodes = []pluginapi.DeviceSpec{
|
||||
{
|
||||
@ -259,7 +259,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
|
||||
Permissions: "rw",
|
||||
},
|
||||
}
|
||||
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "dfl-port.2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
|
||||
nodes = []pluginapi.DeviceSpec{
|
||||
{
|
||||
@ -268,7 +268,7 @@ func TestGetAfuTreeDFL(t *testing.T) {
|
||||
Permissions: "rw",
|
||||
},
|
||||
}
|
||||
expected.AddDevice(afMode+"-"+unhealthyAfuID, "dfl-port.3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
|
||||
expected.AddDevice(unhealthyInterfaceID[:interfaceIDPrefixLength]+unhealthyAfuID, "dfl-port.3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
|
||||
|
||||
nodes = []pluginapi.DeviceSpec{
|
||||
{
|
||||
@ -277,11 +277,11 @@ func TestGetAfuTreeDFL(t *testing.T) {
|
||||
Permissions: "rw",
|
||||
},
|
||||
}
|
||||
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "dfl-port.4", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
expected.AddDevice(unhealthyInterfaceID[:interfaceIDPrefixLength]+unhealthyAfuID, "dfl-port.4", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
|
||||
|
||||
result := getAfuTree(getDevicesDFL())
|
||||
if !reflect.DeepEqual(result, expected) {
|
||||
t.Errorf("Got unexpected result: %v, expected: %v", result, expected)
|
||||
t.Errorf("Got unexpected result:\n%v\nexpected:\n%v", result, expected)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,6 +51,11 @@ const (
|
||||
|
||||
// Frequency of device scans
|
||||
scanFrequency = 5 * time.Second
|
||||
|
||||
// Names of extended resources cannot be longer than 63 characters.
|
||||
// Therefore for AF resources we have to cut the interface ID prefix
|
||||
// to 31 characters only.
|
||||
interfaceIDPrefixLength = 31
|
||||
)
|
||||
|
||||
type getDevTreeFunc func(devices []device) dpapi.DeviceTree
|
||||
@ -125,7 +130,7 @@ func getAfuTree(devices []device) dpapi.DeviceTree {
|
||||
if afu.afuID == unhealthyAfuID {
|
||||
health = pluginapi.Unhealthy
|
||||
}
|
||||
devType := fmt.Sprintf("%s-%s", afMode, afu.afuID)
|
||||
devType := region.interfaceID[:interfaceIDPrefixLength] + afu.afuID
|
||||
devNodes := []pluginapi.DeviceSpec{
|
||||
{
|
||||
HostPath: afu.devNode,
|
||||
|
@ -210,7 +210,7 @@ func TestGetAfuTreeOPAE(t *testing.T) {
|
||||
Permissions: "rw",
|
||||
},
|
||||
}
|
||||
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
|
||||
nodes = []pluginapi.DeviceSpec{
|
||||
{
|
||||
@ -219,7 +219,7 @@ func TestGetAfuTreeOPAE(t *testing.T) {
|
||||
Permissions: "rw",
|
||||
},
|
||||
}
|
||||
expected.AddDevice(afMode+"-d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
expected.AddDevice("ce48969398f05f33946d560708be108d8424dc4a4a3c413f89e433683f9040b", "intel-fpga-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil))
|
||||
|
||||
nodes = []pluginapi.DeviceSpec{
|
||||
{
|
||||
@ -228,7 +228,7 @@ func TestGetAfuTreeOPAE(t *testing.T) {
|
||||
Permissions: "rw",
|
||||
},
|
||||
}
|
||||
expected.AddDevice(afMode+"-"+unhealthyAfuID, "intel-fpga-port.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
|
||||
expected.AddDevice(unhealthyInterfaceID[:interfaceIDPrefixLength]+unhealthyAfuID, "intel-fpga-port.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil))
|
||||
|
||||
result := getAfuTree(getDevicesOPAE())
|
||||
if !reflect.DeepEqual(result, expected) {
|
||||
|
Before Width: | Height: | Size: 53 KiB After Width: | Height: | Size: 53 KiB |
Before Width: | Height: | Size: 59 KiB After Width: | Height: | Size: 59 KiB |
@ -19,4 +19,10 @@ spec:
|
||||
properties:
|
||||
afuId:
|
||||
type: string
|
||||
pattern: '^[0-9a-f]{8,128}$'
|
||||
pattern: '^[0-9a-f]{8,32}$'
|
||||
interfaceId:
|
||||
type: string
|
||||
pattern: '^[0-9a-f]{8,32}$'
|
||||
mode:
|
||||
type: string
|
||||
pattern: '^af|region$'
|
||||
|
@ -1,13 +1,6 @@
|
||||
# DCP 1.0
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
metadata:
|
||||
name: arria10.dcp1.0-compress
|
||||
spec:
|
||||
afuId: 946c21d1e49704a5e5daa0805bc6b0785e1765bf
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
metadata:
|
||||
name: arria10.dcp1.0-nlb0
|
||||
spec:
|
||||
|
@ -29,7 +29,6 @@ spec:
|
||||
args:
|
||||
- -tls-cert-file=/etc/webhook/certs/cert.pem
|
||||
- -tls-private-key-file=/etc/webhook/certs/key.pem
|
||||
- -mode={MODE}
|
||||
- -v=1
|
||||
volumeMounts:
|
||||
- name: webhook-certs
|
||||
|
@ -8,17 +8,12 @@ spec:
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
metadata:
|
||||
name: arria10.dcp1.0-compress
|
||||
spec:
|
||||
afuId: 946c21d1e49704a5e5daa0805bc6b0785e1765bf
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
metadata:
|
||||
name: arria10.dcp1.0-nlb0
|
||||
spec:
|
||||
afuId: d8424dc4a4a3c413f89e433683f9040b
|
||||
interfaceId: ce48969398f05f33946d560708be108a
|
||||
mode: region
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
@ -26,6 +21,8 @@ metadata:
|
||||
name: arria10.dcp1.0-nlb3
|
||||
spec:
|
||||
afuId: f7df405cbd7acf7222f144b0b93acd18
|
||||
interfaceId: ce48969398f05f33946d560708be108a
|
||||
mode: region
|
||||
---
|
||||
# DCP 1.1
|
||||
apiVersion: fpga.intel.com/v1
|
||||
@ -41,6 +38,8 @@ metadata:
|
||||
name: arria10.dcp1.1-nlb0
|
||||
spec:
|
||||
afuId: d8424dc4a4a3c413f89e433683f9040b
|
||||
interfaceId: 9926ab6d6c925a68aabca7d84c545738
|
||||
mode: region
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
@ -48,6 +47,8 @@ metadata:
|
||||
name: arria10.dcp1.1-nlb3
|
||||
spec:
|
||||
afuId: f7df405cbd7acf7222f144b0b93acd18
|
||||
interfaceId: 9926ab6d6c925a68aabca7d84c545738
|
||||
mode: region
|
||||
---
|
||||
# DCP 1.2
|
||||
apiVersion: fpga.intel.com/v1
|
||||
@ -63,6 +64,8 @@ metadata:
|
||||
name: arria10.dcp1.2-nlb0
|
||||
spec:
|
||||
afuId: d8424dc4a4a3c413f89e433683f9040b
|
||||
interfaceId: 69528db6eb31577a8c3668f9faa081f6
|
||||
mode: region
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
@ -70,6 +73,8 @@ metadata:
|
||||
name: arria10.dcp1.2-nlb3
|
||||
spec:
|
||||
afuId: f7df405cbd7acf7222f144b0b93acd18
|
||||
interfaceId: 69528db6eb31577a8c3668f9faa081f6
|
||||
mode: region
|
||||
---
|
||||
# D5005
|
||||
apiVersion: fpga.intel.com/v1
|
||||
@ -85,6 +90,8 @@ metadata:
|
||||
name: d5005-nlb0
|
||||
spec:
|
||||
afuId: d8424dc4a4a3c413f89e433683f9040b
|
||||
interfaceId: bfac4d851ee856fe8c95865ce1bbaa2d
|
||||
mode: region
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
@ -92,3 +99,14 @@ metadata:
|
||||
name: d5005-nlb3
|
||||
spec:
|
||||
afuId: f7df405cbd7acf7222f144b0b93acd18
|
||||
interfaceId: bfac4d851ee856fe8c95865ce1bbaa2d
|
||||
mode: region
|
||||
---
|
||||
apiVersion: fpga.intel.com/v1
|
||||
kind: AcceleratorFunction
|
||||
metadata:
|
||||
name: d5005-nlb3-preprogrammed
|
||||
spec:
|
||||
afuId: f7df405cbd7acf7222f144b0b93acd18
|
||||
interfaceId: bfac4d851ee856fe8c95865ce1bbaa2d
|
||||
mode: af
|
||||
|
@ -19,4 +19,4 @@ spec:
|
||||
properties:
|
||||
interfaceId:
|
||||
type: string
|
||||
pattern: '^[0-9a-f]{8,128}$'
|
||||
pattern: '^[0-9a-f]{8,32}$'
|
||||
|
@ -18,7 +18,9 @@ type AcceleratorFunction struct {
|
||||
|
||||
// AcceleratorFunctionSpec contains actual specs for AcceleratorFunction
|
||||
type AcceleratorFunctionSpec struct {
|
||||
AfuID string `json:"afuId"`
|
||||
AfuID string `json:"afuId"`
|
||||
InterfaceID string `json:"interfaceId"`
|
||||
Mode string `json:"mode"`
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
@ -15,7 +15,6 @@ function help {
|
||||
echo ''
|
||||
echo ' Options:'
|
||||
echo ' --kubectl <kubectl> - path to the kubectl utility'
|
||||
echo ' --mode <mode> - "preprogrammed" (default) or "orchestrated" mode of operation'
|
||||
echo ' --ca-bundle-path <path> - path to CA bundle used for signing cerificates in the cluster'
|
||||
echo ' --namespace <name> - namespace to deploy the webhook in'
|
||||
}
|
||||
@ -30,10 +29,6 @@ while [[ $# -gt 0 ]]; do
|
||||
cabundlepath="$2"
|
||||
shift
|
||||
;;
|
||||
--mode)
|
||||
mode="$2"
|
||||
shift
|
||||
;;
|
||||
--namespace)
|
||||
namespace="$2"
|
||||
shift
|
||||
@ -54,7 +49,6 @@ while [[ $# -gt 0 ]]; do
|
||||
done
|
||||
|
||||
[ -z ${kubectl} ] && kubectl="kubectl"
|
||||
[ -z ${mode} ] && mode="preprogrammed"
|
||||
[ -z ${namespace} ] && namespace="default"
|
||||
|
||||
which ${kubectl} > /dev/null 2>&1 || { echo "ERROR: ${kubectl} not found"; exit 1; }
|
||||
@ -75,11 +69,6 @@ if [ "x${command}" = "xcleanup" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "x${mode}" != "xpreprogrammed" -a "x${mode}" != "xorchestrated" ]; then
|
||||
echo "ERROR: supported modes are 'preprogrammed' and 'orchestrated'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z ${cabundlepath} ]; then
|
||||
CA_BUNDLE=$(${kubectl} get configmap -n kube-system extension-apiserver-authentication -o=jsonpath='{.data.client-ca-file}' | base64 -w 0)
|
||||
else
|
||||
@ -98,7 +87,7 @@ cat ${srcroot}/deployments/fpga_admissionwebhook/rbac-config-tpl.yaml | \
|
||||
${kubectl} create -f -
|
||||
|
||||
echo "Create webhook deployment"
|
||||
cat ${srcroot}/deployments/fpga_admissionwebhook/deployment-tpl.yaml | sed -e "s/{MODE}/${mode}/g" -e "s/{uid}/${uid}/g" -e "s/{gid}/${gid}/g" | ${kubectl} --namespace ${namespace} create -f -
|
||||
cat ${srcroot}/deployments/fpga_admissionwebhook/deployment-tpl.yaml | sed -e "s/{uid}/${uid}/g" -e "s/{gid}/${gid}/g" | ${kubectl} --namespace ${namespace} create -f -
|
||||
|
||||
echo "Create webhook service"
|
||||
${kubectl} --namespace ${namespace} create -f ${srcroot}/deployments/fpga_admissionwebhook/service.yaml
|
||||
|
@ -51,25 +51,37 @@ func describe() {
|
||||
return append(os.Environ(), "KUBECONFIG="+framework.TestContext.KubeConfig)
|
||||
}
|
||||
|
||||
ginkgo.It("mutates created pods to reference resolved AFs in preprogrammed mode", func() {
|
||||
ginkgo.By("deploying webhook in preprogrammed mode")
|
||||
ginkgo.It("mutates created pods to reference resolved AFs", func() {
|
||||
ginkgo.By("deploying webhook")
|
||||
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
checkPodMutation(f, "fpga.intel.com/af-d8424dc4a4a3c413f89e433683f9040b")
|
||||
checkPodMutation(f, "fpga.intel.com/d5005-nlb3-preprogrammed",
|
||||
"fpga.intel.com/bfac4d851ee856fe8c95865ce1bbaa2f7df405cbd7acf7222f144b0b93acd18")
|
||||
})
|
||||
|
||||
ginkgo.It("mutates created pods to reference resolved Regions in orchestrated mode", func() {
|
||||
ginkgo.By("deploying webhook in orchestrated mode")
|
||||
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name, "--mode", "orchestrated")
|
||||
ginkgo.It("mutates created pods to reference resolved Regions", func() {
|
||||
ginkgo.By("deploying webhook")
|
||||
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
checkPodMutation(f, "fpga.intel.com/region-ce48969398f05f33946d560708be108a")
|
||||
checkPodMutation(f, "fpga.intel.com/arria10.dcp1.0-nlb0",
|
||||
"fpga.intel.com/region-ce48969398f05f33946d560708be108a")
|
||||
|
||||
})
|
||||
|
||||
ginkgo.It("mutates created pods to reference resolved Regions in regiondevel mode", func() {
|
||||
ginkgo.By("deploying webhook")
|
||||
_, _, err := framework.RunCmdEnv(getEnv(), webhookDeployPath, "--kubectl", framework.TestContext.KubectlPath, "--namespace", f.Namespace.Name)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
checkPodMutation(f, "fpga.intel.com/arria10.dcp1.0",
|
||||
"fpga.intel.com/region-ce48969398f05f33946d560708be108a")
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
func checkPodMutation(f *framework.Framework, expectedMutation v1.ResourceName) {
|
||||
func checkPodMutation(f *framework.Framework, source, expectedMutation v1.ResourceName) {
|
||||
ginkgo.By("waiting for webhook's availability")
|
||||
if _, err := e2epod.WaitForPodsWithLabelRunningReady(f.ClientSet, f.Namespace.Name,
|
||||
labels.Set{"app": "intel-fpga-webhook"}.AsSelector(), 1 /* one replica */, 10*time.Second); err != nil {
|
||||
@ -80,8 +92,8 @@ func checkPodMutation(f *framework.Framework, expectedMutation v1.ResourceName)
|
||||
|
||||
ginkgo.By("submitting a pod for addmission")
|
||||
podSpec := f.NewTestPod("webhook-tester",
|
||||
v1.ResourceList{"fpga.intel.com/arria10.dcp1.0-nlb0": resource.MustParse("1")},
|
||||
v1.ResourceList{"fpga.intel.com/arria10.dcp1.0-nlb0": resource.MustParse("1")})
|
||||
v1.ResourceList{source: resource.MustParse("1")},
|
||||
v1.ResourceList{source: resource.MustParse("1")})
|
||||
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(),
|
||||
podSpec, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err, "pod Create API error")
|
||||
|
Loading…
Reference in New Issue
Block a user