diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 01e573e3..b4f46657 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -71,6 +71,7 @@ jobs: - intel-fpga-plugin - intel-qat-plugin - intel-vpu-plugin + - intel-deviceplugin-operator # Demo images - crypto-perf diff --git a/.gitignore b/.gitignore index 549f01d5..a1c9d71e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ cmd/fpga_plugin/fpga_plugin cmd/fpga_tool/fpga_tool cmd/gpu_plugin/gpu_plugin cmd/qat_plugin/qat_plugin +cmd/operator/operator deployments/fpga_admissionwebhook/base/intel-fpga-webhook-certs-secret diff --git a/Makefile b/Makefile index 361b49e1..28454223 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ +CONTROLLER_GEN ?= controller-gen GO := go GOFMT := gofmt KUBECTL ?= kubectl @@ -10,7 +11,7 @@ EXTRA_BUILD_ARGS ?= "" WEBHOOK_IMAGE_FILE = intel-fpga-admissionwebhook-devel.tgz -pkgs = $(shell $(GO) list ./... | grep -v vendor | grep -v e2e) +pkgs = $(shell $(GO) list ./... | grep -v vendor | grep -v e2e | grep -v envtest) cmds = $(shell ls cmd) e2e_tmp_dir := $(shell mktemp -u -t e2e-tests.XXXXXXXXXX) @@ -50,16 +51,38 @@ test-with-kind: rm -rf $(e2e_tmp_dir); \ exit $$rc +envtest: + @$(GO) test ./test/envtest + lint: - @golangci-lint run --timeout 5m + @golangci-lint run --timeout 15m checks: lint go-mod-tidy +generate: + $(CONTROLLER_GEN) object:headerFile="build/boilerplate/boilerplate.go.txt" paths="./pkg/apis/deviceplugin/..." + $(CONTROLLER_GEN) crd:trivialVersions=true \ + paths="./pkg/apis/deviceplugin/..." \ + output:crd:artifacts:config=deployments/operator/crd/bases + $(CONTROLLER_GEN) webhook \ + paths="./pkg/apis/deviceplugin/..." \ + output:webhook:artifacts:config=deployments/operator/webhook + $(CONTROLLER_GEN) rbac:roleName=manager-role paths="./pkg/controllers/..." output:dir=deployments/operator/rbac + $(cmds): cd cmd/$@; $(GO) build -tags $(BUILDTAGS) build: $(cmds) +deploy-operator: operator generate + kubectl apply -k deployments/operator/default + +undeploy-operator: + kubectl delete -k deployments/operator/default + +run-operator: deploy-operator + ./cmd/operator/operator + clean: @for cmd in $(cmds) ; do pwd=$(shell pwd) ; cd cmd/$$cmd ; $(GO) clean ; cd $$pwd ; done @@ -111,4 +134,4 @@ check-github-actions: jq -e '$(images_json) - .jobs.image.strategy.matrix.image == []' > /dev/null || \ (echo "Make sure all images are listed in .github/workflows/ci.yaml"; exit 1) -.PHONY: all format test lint build images $(cmds) $(images) lock-images vendor pre-pull set-version check-github-actions +.PHONY: all format test lint build images $(cmds) $(images) lock-images vendor pre-pull set-version check-github-actions run-operator envtest deploy-operator undeploy-operator diff --git a/README.md b/README.md index c066b6dd..2db715db 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ * [CRI-O prestart hook](#cri-o-prestart-hook) * [QAT device plugin](#qat-device-plugin) * [VPU device plugin](#vpu-device-plugin) +* [Device Plugins Operator](#device-plugins-operator) * [Demos](#demos) * [Developers](#developers) * [Running e2e Tests](#running-e2e-tests) @@ -116,6 +117,33 @@ the card has: The demo subdirectory includes details of a OpenVINO deployment and use of the VPU plugin. Sources can be found in [openvino-demo](demo/ubuntu-demo-openvino) +## Device Plugins Operator + +Currently the operator has limited support for the QAT and GPU device plugins: +it validates container image references and extends reported statuses. + +To run an operator instance in the container run + +```bash +$ kubectl apply --validate=false -f https://github.com/jetstack/cert-manager/releases/download/v0.15.0/cert-manager.yaml +$ make deploy-operator +``` + +Then deploy your device plugin by applying its custom resource, e.g. +`GpuDevicePlugin` with + +```bash +$ kubectl apply -f ./deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml +``` + +Observe it is up and running: + +```bash +$ kubectl get GpuDevicePlugin +NAME DESIRED READY NODE SELECTOR AGE +gpudeviceplugin-sample 1 1 5s +``` + ## Demos The [demo subdirectory](demo/readme.md) contains a number of demonstrations for a variety of the @@ -165,6 +193,23 @@ without a pre-configured Kubernetes cluster. Just make sure you have $ make test-with-kind ``` +## Running controller tests with a local control plane + +The controller-runtime library provides a package for integration testing by +starting a local control plane. The package is called +[envtest](https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/envtest). The +operator uses this package for its integration testing. +Please have a look at `envtest`'s documentation to set up it properly. But basically +you just need to have `etcd` and `kube-apiserver` binaries available on your +host. By default they are expected to be located at `/usr/local/kubebuilder/bin`. +But you can have it stored anywhere by setting the `KUBEBUILDER_ASSETS` +environment variable. So, given you have the binaries copied to +`$(HOME)/work/kubebuilder-assets` to run the tests just enter + +```bash +$ KUBEBUILDER_ASSETS=$(HOME)/work/kubebuilder-assets make envtest +``` + ## Supported Kubernetes versions Releases are made under the github [releases area](../../releases). Supported releases and diff --git a/build/docker/intel-deviceplugin-operator.Dockerfile b/build/docker/intel-deviceplugin-operator.Dockerfile new file mode 100644 index 00000000..e6110d93 --- /dev/null +++ b/build/docker/intel-deviceplugin-operator.Dockerfile @@ -0,0 +1,38 @@ +# CLEAR_LINUX_BASE and CLEAR_LINUX_VERSION can be used to make the build +# reproducible by choosing an image by its hash and installing an OS version +# with --version=: +# CLEAR_LINUX_BASE=clearlinux@sha256:b8e5d3b2576eb6d868f8d52e401f678c873264d349e469637f98ee2adf7b33d4 +# CLEAR_LINUX_VERSION="--version=29970" +# +# This is used on release branches before tagging a stable version. +# The master branch defaults to using the latest Clear Linux. +ARG CLEAR_LINUX_BASE=clearlinux/golang:latest + +FROM ${CLEAR_LINUX_BASE} as builder + +ARG CLEAR_LINUX_VERSION= + +RUN swupd update --no-boot-update ${CLEAR_LINUX_VERSION} + +ARG DIR=/intel-device-plugins-for-kubernetes +ARG GO111MODULE=on +WORKDIR $DIR +COPY . . + +RUN mkdir /install_root \ + && swupd os-install \ + ${CLEAR_LINUX_VERSION} \ + --path /install_root \ + --statedir /swupd-state \ + --no-boot-update \ + && rm -rf /install_root/var/lib/swupd/* + +RUN cd cmd/operator; GO111MODULE=${GO111MODULE} go install; cd - +RUN chmod a+x /go/bin/operator \ + && install -D /go/bin/operator /install_root/usr/local/bin/intel_deviceplugin_operator \ + && install -D ${DIR}/LICENSE /install_root/usr/local/share/package-licenses/intel-device-plugins-for-kubernetes/LICENSE \ + && scripts/copy-modules-licenses.sh ./cmd/operator /install_root/usr/local/share/package-licenses/ + +FROM scratch as final +COPY --from=builder /install_root / +ENTRYPOINT ["/usr/local/bin/intel_deviceplugin_operator"] diff --git a/build/docker/intel-gpu-plugin.Dockerfile b/build/docker/intel-gpu-plugin.Dockerfile index 0020d5af..d980e1b8 100644 --- a/build/docker/intel-gpu-plugin.Dockerfile +++ b/build/docker/intel-gpu-plugin.Dockerfile @@ -35,4 +35,4 @@ RUN chmod a+x /go/bin/gpu_plugin \ FROM scratch as final COPY --from=builder /install_root / -CMD ["/usr/local/bin/intel_gpu_device_plugin"] +ENTRYPOINT ["/usr/local/bin/intel_gpu_device_plugin"] diff --git a/cmd/operator/main.go b/cmd/operator/main.go new file mode 100644 index 00000000..eed9b645 --- /dev/null +++ b/cmd/operator/main.go @@ -0,0 +1,89 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "flag" + "os" + + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + devicepluginv1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/deviceplugin/v1" + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers/gpu" + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers/qat" +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + _ = clientgoscheme.AddToScheme(scheme) + + _ = devicepluginv1.AddToScheme(scheme) + // +kubebuilder:scaffold:scheme +} + +func main() { + var metricsAddr string + var enableLeaderElection bool + flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseDevMode(true))) + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + MetricsBindAddress: metricsAddr, + Port: 9443, + LeaderElection: enableLeaderElection, + LeaderElectionID: "d1c7b6d5.intel.com", + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + if err = gpu.SetupReconciler(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "GpuDevicePlugin") + os.Exit(1) + } + if err = (&devicepluginv1.GpuDevicePlugin{}).SetupWebhookWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create webhook", "webhook", "GpuDevicePlugin") + os.Exit(1) + } + + if err = qat.SetupReconciler(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "QatDevicePlugin") + os.Exit(1) + } + if err = (&devicepluginv1.QatDevicePlugin{}).SetupWebhookWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create webhook", "webhook", "QatDevicePlugin") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/deployments/operator/certmanager/certificate.yaml b/deployments/operator/certmanager/certificate.yaml new file mode 100644 index 00000000..3d46cff8 --- /dev/null +++ b/deployments/operator/certmanager/certificate.yaml @@ -0,0 +1,26 @@ +# The following manifests contain a self-signed issuer CR and a certificate CR. +# More document can be found at https://docs.cert-manager.io +# WARNING: Targets CertManager 0.11 check https://docs.cert-manager.io/en/latest/tasks/upgrading/index.html for +# breaking changes +apiVersion: cert-manager.io/v1alpha2 +kind: Issuer +metadata: + name: selfsigned-issuer + namespace: system +spec: + selfSigned: {} +--- +apiVersion: cert-manager.io/v1alpha2 +kind: Certificate +metadata: + name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml + namespace: system +spec: + # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize + dnsNames: + - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc + - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local + issuerRef: + kind: Issuer + name: selfsigned-issuer + secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize diff --git a/deployments/operator/certmanager/kustomization.yaml b/deployments/operator/certmanager/kustomization.yaml new file mode 100644 index 00000000..bebea5a5 --- /dev/null +++ b/deployments/operator/certmanager/kustomization.yaml @@ -0,0 +1,5 @@ +resources: +- certificate.yaml + +configurations: +- kustomizeconfig.yaml diff --git a/deployments/operator/certmanager/kustomizeconfig.yaml b/deployments/operator/certmanager/kustomizeconfig.yaml new file mode 100644 index 00000000..e631f777 --- /dev/null +++ b/deployments/operator/certmanager/kustomizeconfig.yaml @@ -0,0 +1,16 @@ +# This configuration is for teaching kustomize how to update name ref and var substitution +nameReference: +- kind: Issuer + group: cert-manager.io + fieldSpecs: + - kind: Certificate + group: cert-manager.io + path: spec/issuerRef/name + +varReference: +- kind: Certificate + group: cert-manager.io + path: spec/commonName +- kind: Certificate + group: cert-manager.io + path: spec/dnsNames diff --git a/deployments/operator/crd/bases/deviceplugin.intel.com_gpudeviceplugins.yaml b/deployments/operator/crd/bases/deviceplugin.intel.com_gpudeviceplugins.yaml new file mode 100644 index 00000000..1b7dcdbd --- /dev/null +++ b/deployments/operator/crd/bases/deviceplugin.intel.com_gpudeviceplugins.yaml @@ -0,0 +1,143 @@ + +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.3.0 + creationTimestamp: null + name: gpudeviceplugins.deviceplugin.intel.com +spec: + additionalPrinterColumns: + - JSONPath: .status.desiredNumberScheduled + name: Desired + type: integer + - JSONPath: .status.numberReady + name: Ready + type: integer + - JSONPath: .spec.nodeSelector + name: Node Selector + type: string + - JSONPath: .metadata.creationTimestamp + name: Age + type: date + group: deviceplugin.intel.com + names: + kind: GpuDevicePlugin + listKind: GpuDevicePluginList + plural: gpudeviceplugins + singular: gpudeviceplugin + scope: Namespaced + subresources: + status: {} + validation: + openAPIV3Schema: + description: GpuDevicePlugin is the Schema for the gpudeviceplugins API. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: GpuDevicePluginSpec defines the desired state of GpuDevicePlugin. + properties: + image: + description: Image is a container image with GPU device plugin executable. + type: string + logLevel: + description: LogLevel sets the plugin's log level. + minimum: 0 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector provides a simple way to constrain device + plugin pods to nodes with particular labels. + type: object + sharedDevNum: + description: SharedDevNum is a number of containers that can share the + same GPU device. + minimum: 1 + type: integer + type: object + status: + description: 'GpuDevicePluginStatus defines the observed state of GpuDevicePlugin. + TODO(rojkov): consider code deduplication with QatDevicePluginStatus.' + properties: + controlledDaemonSet: + description: ControlledDaemoSet references the DaemonSet controlled + by the operator. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: 'If referring to a piece of an object instead of an + entire object, this string should contain a valid JSON/Go field + access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within + a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" + (container with index 2 in this pod). This syntax is chosen only + to have some well-defined way of referencing a part of an object. + TODO: this design is not final and this field is subject to change + in the future.' + type: string + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference is + made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' + type: string + type: object + desiredNumberScheduled: + description: The total number of nodes that should be running the device + plugin pod (including nodes correctly running the device plugin pod). + format: int32 + type: integer + nodeNames: + description: The list of Node names where the device plugin pods are + running. + items: + type: string + type: array + numberReady: + description: The number of nodes that should be running the device plugin + pod and have one or more of the device plugin pod running and ready. + format: int32 + type: integer + required: + - desiredNumberScheduled + - numberReady + type: object + type: object + version: v1 + versions: + - name: v1 + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/deployments/operator/crd/bases/deviceplugin.intel.com_qatdeviceplugins.yaml b/deployments/operator/crd/bases/deviceplugin.intel.com_qatdeviceplugins.yaml new file mode 100644 index 00000000..c80946d8 --- /dev/null +++ b/deployments/operator/crd/bases/deviceplugin.intel.com_qatdeviceplugins.yaml @@ -0,0 +1,163 @@ + +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.3.0 + creationTimestamp: null + name: qatdeviceplugins.deviceplugin.intel.com +spec: + additionalPrinterColumns: + - JSONPath: .status.desiredNumberScheduled + name: Desired + type: integer + - JSONPath: .status.numberReady + name: Ready + type: integer + - JSONPath: .spec.nodeSelector + name: Node Selector + type: string + - JSONPath: .metadata.creationTimestamp + name: Age + type: date + group: deviceplugin.intel.com + names: + kind: QatDevicePlugin + listKind: QatDevicePluginList + plural: qatdeviceplugins + singular: qatdeviceplugin + scope: Namespaced + subresources: + status: {} + validation: + openAPIV3Schema: + description: QatDevicePlugin is the Schema for the qatdeviceplugins API. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: QatDevicePluginSpec defines the desired state of QatDevicePlugin. + properties: + dpdkDriver: + description: DpdkDriver is a DPDK device driver for configuring the + QAT device. + enum: + - igb_uio + - vfio-pci + type: string + image: + description: Image is a container image with QAT device plugin executable. + type: string + kernelVfDrivers: + description: KernelVfDrivers is a list of VF device drivers for the + QuickAssist devices in the system. + items: + description: KernelVfDriver is a VF device driver for QuickAssist + devices. + enum: + - dh895xccvf + - c6xxvf + - c3xxxvf + - d15xxvf + type: string + type: array + logLevel: + description: LogLevel sets the plugin's log level. + minimum: 0 + type: integer + maxNumDevices: + description: MaxNumDevices is a maximum number of QAT devices to be + provided to the QuickAssist device plugin + minimum: 1 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector provides a simple way to constrain device + plugin pods to nodes with particular labels. + type: object + type: object + status: + description: 'QatDevicePluginStatus defines the observed state of QatDevicePlugin. + TODO(rojkov): consider code deduplication with GpuDevicePluginStatus.' + properties: + controlledDaemonSet: + description: ControlledDaemoSet references the DaemonSet controlled + by the operator. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: 'If referring to a piece of an object instead of an + entire object, this string should contain a valid JSON/Go field + access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within + a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" + (container with index 2 in this pod). This syntax is chosen only + to have some well-defined way of referencing a part of an object. + TODO: this design is not final and this field is subject to change + in the future.' + type: string + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference is + made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' + type: string + type: object + desiredNumberScheduled: + description: The total number of nodes that should be running the device + plugin pod (including nodes correctly running the device plugin pod). + format: int32 + type: integer + nodeNames: + description: The list of Node names where the device plugin pods are + running. + items: + type: string + type: array + numberReady: + description: The number of nodes that should be running the device plugin + pod and have one or more of the device plugin pod running and ready. + format: int32 + type: integer + required: + - desiredNumberScheduled + - numberReady + type: object + type: object + version: v1 + versions: + - name: v1 + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/deployments/operator/crd/kustomization.yaml b/deployments/operator/crd/kustomization.yaml new file mode 100644 index 00000000..71653bbd --- /dev/null +++ b/deployments/operator/crd/kustomization.yaml @@ -0,0 +1,26 @@ +# This kustomization.yaml is not intended to be run by itself, +# since it depends on service name and namespace that are out of this kustomize package. +# It should be run by deployment/operator/default +resources: +- bases/deviceplugin.intel.com_gpudeviceplugins.yaml +- bases/deviceplugin.intel.com_qatdeviceplugins.yaml +# +kubebuilder:scaffold:crdkustomizeresource + +patchesStrategicMerge: +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. +# patches here are for enabling the conversion webhook for each CRD +- patches/webhook_in_gpudeviceplugins.yaml +- patches/webhook_in_qatdeviceplugins.yaml +# +kubebuilder:scaffold:crdkustomizewebhookpatch + +# [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. +# patches here are for enabling the CA injection for each CRD +- patches/cainjection_in_gpudeviceplugins.yaml +- patches/cainjection_in_qatdeviceplugins.yaml +# +kubebuilder:scaffold:crdkustomizecainjectionpatch + +# TODO(rojkov): enable Prometheus + +# the following config is for teaching kustomize how to do kustomization for CRDs. +configurations: +- kustomizeconfig.yaml diff --git a/deployments/operator/crd/kustomizeconfig.yaml b/deployments/operator/crd/kustomizeconfig.yaml new file mode 100644 index 00000000..6f83d9a9 --- /dev/null +++ b/deployments/operator/crd/kustomizeconfig.yaml @@ -0,0 +1,17 @@ +# This file is for teaching kustomize how to substitute name and namespace reference in CRD +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: CustomResourceDefinition + group: apiextensions.k8s.io + path: spec/conversion/webhookClientConfig/service/name + +namespace: +- kind: CustomResourceDefinition + group: apiextensions.k8s.io + path: spec/conversion/webhookClientConfig/service/namespace + create: false + +varReference: +- path: metadata/annotations diff --git a/deployments/operator/crd/patches/cainjection_in_gpudeviceplugins.yaml b/deployments/operator/crd/patches/cainjection_in_gpudeviceplugins.yaml new file mode 100644 index 00000000..12a9031d --- /dev/null +++ b/deployments/operator/crd/patches/cainjection_in_gpudeviceplugins.yaml @@ -0,0 +1,8 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +# CRD conversion requires k8s 1.13 or later. +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + name: gpudeviceplugins.deviceplugin.intel.com diff --git a/deployments/operator/crd/patches/cainjection_in_qatdeviceplugins.yaml b/deployments/operator/crd/patches/cainjection_in_qatdeviceplugins.yaml new file mode 100644 index 00000000..db8ad86f --- /dev/null +++ b/deployments/operator/crd/patches/cainjection_in_qatdeviceplugins.yaml @@ -0,0 +1,8 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +# CRD conversion requires k8s 1.13 or later. +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + name: qatdeviceplugins.deviceplugin.intel.com diff --git a/deployments/operator/crd/patches/webhook_in_gpudeviceplugins.yaml b/deployments/operator/crd/patches/webhook_in_gpudeviceplugins.yaml new file mode 100644 index 00000000..960d2ba0 --- /dev/null +++ b/deployments/operator/crd/patches/webhook_in_gpudeviceplugins.yaml @@ -0,0 +1,19 @@ +# The following patch enables conversion webhook for CRD +# CRD conversion requires k8s 1.13 or later. +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: gpudeviceplugins.deviceplugin.intel.com +spec: + # prunes object fields that are not specified in OpenAPI schemas below. + preserveUnknownFields: false + conversion: + strategy: Webhook + webhookClientConfig: + # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, + # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) + caBundle: Cg== + service: + namespace: system + name: webhook-service + path: /convert diff --git a/deployments/operator/crd/patches/webhook_in_qatdeviceplugins.yaml b/deployments/operator/crd/patches/webhook_in_qatdeviceplugins.yaml new file mode 100644 index 00000000..362582de --- /dev/null +++ b/deployments/operator/crd/patches/webhook_in_qatdeviceplugins.yaml @@ -0,0 +1,19 @@ +# The following patch enables conversion webhook for CRD +# CRD conversion requires k8s 1.13 or later. +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: qatdeviceplugins.deviceplugin.intel.com +spec: + # prunes object fields that are not specified in OpenAPI schemas below. + preserveUnknownFields: false + conversion: + strategy: Webhook + webhookClientConfig: + # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, + # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) + caBundle: Cg== + service: + namespace: system + name: webhook-service + path: /convert diff --git a/deployments/operator/default/kustomization.yaml b/deployments/operator/default/kustomization.yaml new file mode 100644 index 00000000..e7d137e9 --- /dev/null +++ b/deployments/operator/default/kustomization.yaml @@ -0,0 +1,58 @@ +# Adds namespace to all resources. +namespace: inteldeviceplugins-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: inteldeviceplugins- + +# Labels to add to all resources and selectors. +#commonLabels: +# someName: someValue + +bases: +- ../crd +- ../rbac +- ../manager +- ../webhook +- ../certmanager + +patchesStrategicMerge: + # Protect the /metrics endpoint by putting it behind auth. + # If you want your controller-manager to expose the /metrics + # endpoint w/o any authn/z, please comment the following line. +- manager_auth_proxy_patch.yaml + # Enable webhook +- manager_webhook_patch.yaml + # Enable certmanager integration +- webhookcainjection_patch.yaml + +vars: +- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR + objref: + kind: Certificate + group: cert-manager.io + version: v1alpha2 + name: serving-cert # this name should match the one in certificate.yaml + fieldref: + fieldpath: metadata.namespace +- name: CERTIFICATE_NAME + objref: + kind: Certificate + group: cert-manager.io + version: v1alpha2 + name: serving-cert # this name should match the one in certificate.yaml +- name: SERVICE_NAMESPACE # namespace of the service + objref: + kind: Service + version: v1 + name: webhook-service + fieldref: + fieldpath: metadata.namespace +- name: SERVICE_NAME + objref: + kind: Service + version: v1 + name: webhook-service diff --git a/deployments/operator/default/manager_auth_proxy_patch.yaml b/deployments/operator/default/manager_auth_proxy_patch.yaml new file mode 100644 index 00000000..a945ed17 --- /dev/null +++ b/deployments/operator/default/manager_auth_proxy_patch.yaml @@ -0,0 +1,25 @@ +# This patch inject a sidecar container which is a HTTP proxy for the +# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: kube-rbac-proxy + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.5.0 + args: + - "--secure-listen-address=0.0.0.0:8443" + - "--upstream=http://127.0.0.1:8080/" + - "--logtostderr=true" + - "--v=10" + ports: + - containerPort: 8443 + name: https + - name: manager + args: + - "--metrics-addr=127.0.0.1:8080" + - "--enable-leader-election" diff --git a/deployments/operator/default/manager_webhook_patch.yaml b/deployments/operator/default/manager_webhook_patch.yaml new file mode 100644 index 00000000..738de350 --- /dev/null +++ b/deployments/operator/default/manager_webhook_patch.yaml @@ -0,0 +1,23 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: manager + ports: + - containerPort: 9443 + name: webhook-server + protocol: TCP + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: webhook-server-cert diff --git a/deployments/operator/default/webhookcainjection_patch.yaml b/deployments/operator/default/webhookcainjection_patch.yaml new file mode 100644 index 00000000..7e79bf99 --- /dev/null +++ b/deployments/operator/default/webhookcainjection_patch.yaml @@ -0,0 +1,15 @@ +# This patch add annotation to admission webhook config and +# the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize. +apiVersion: admissionregistration.k8s.io/v1beta1 +kind: MutatingWebhookConfiguration +metadata: + name: mutating-webhook-configuration + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) +--- +apiVersion: admissionregistration.k8s.io/v1beta1 +kind: ValidatingWebhookConfiguration +metadata: + name: validating-webhook-configuration + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) diff --git a/deployments/operator/manager/kustomization.yaml b/deployments/operator/manager/kustomization.yaml new file mode 100644 index 00000000..5c5f0b84 --- /dev/null +++ b/deployments/operator/manager/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- manager.yaml diff --git a/deployments/operator/manager/manager.yaml b/deployments/operator/manager/manager.yaml new file mode 100644 index 00000000..4f1d9ab7 --- /dev/null +++ b/deployments/operator/manager/manager.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + name: system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager +spec: + selector: + matchLabels: + control-plane: controller-manager + replicas: 1 + template: + metadata: + labels: + control-plane: controller-manager + spec: + containers: + - image: intel/intel-deviceplugin-operator:devel + name: manager + resources: + limits: + cpu: 100m + memory: 30Mi + requests: + cpu: 100m + memory: 20Mi + terminationGracePeriodSeconds: 10 diff --git a/deployments/operator/rbac/auth_proxy_client_clusterrole.yaml b/deployments/operator/rbac/auth_proxy_client_clusterrole.yaml new file mode 100644 index 00000000..7d62534c --- /dev/null +++ b/deployments/operator/rbac/auth_proxy_client_clusterrole.yaml @@ -0,0 +1,7 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: ["/metrics"] + verbs: ["get"] diff --git a/deployments/operator/rbac/auth_proxy_role.yaml b/deployments/operator/rbac/auth_proxy_role.yaml new file mode 100644 index 00000000..618f5e41 --- /dev/null +++ b/deployments/operator/rbac/auth_proxy_role.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: proxy-role +rules: +- apiGroups: ["authentication.k8s.io"] + resources: + - tokenreviews + verbs: ["create"] +- apiGroups: ["authorization.k8s.io"] + resources: + - subjectaccessreviews + verbs: ["create"] diff --git a/deployments/operator/rbac/auth_proxy_role_binding.yaml b/deployments/operator/rbac/auth_proxy_role_binding.yaml new file mode 100644 index 00000000..48ed1e4b --- /dev/null +++ b/deployments/operator/rbac/auth_proxy_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: proxy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: proxy-role +subjects: +- kind: ServiceAccount + name: default + namespace: system diff --git a/deployments/operator/rbac/auth_proxy_service.yaml b/deployments/operator/rbac/auth_proxy_service.yaml new file mode 100644 index 00000000..6cf656be --- /dev/null +++ b/deployments/operator/rbac/auth_proxy_service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + targetPort: https + selector: + control-plane: controller-manager diff --git a/deployments/operator/rbac/gpudeviceplugin_editor_role.yaml b/deployments/operator/rbac/gpudeviceplugin_editor_role.yaml new file mode 100644 index 00000000..3a188116 --- /dev/null +++ b/deployments/operator/rbac/gpudeviceplugin_editor_role.yaml @@ -0,0 +1,24 @@ +# permissions for end users to edit gpudeviceplugins. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: gpudeviceplugin-editor-role +rules: +- apiGroups: + - deviceplugin.intel.com + resources: + - gpudeviceplugins + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - deviceplugin.intel.com + resources: + - gpudeviceplugins/status + verbs: + - get diff --git a/deployments/operator/rbac/gpudeviceplugin_viewer_role.yaml b/deployments/operator/rbac/gpudeviceplugin_viewer_role.yaml new file mode 100644 index 00000000..6d719503 --- /dev/null +++ b/deployments/operator/rbac/gpudeviceplugin_viewer_role.yaml @@ -0,0 +1,20 @@ +# permissions for end users to view gpudeviceplugins. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: gpudeviceplugin-viewer-role +rules: +- apiGroups: + - deviceplugin.intel.com + resources: + - gpudeviceplugins + verbs: + - get + - list + - watch +- apiGroups: + - deviceplugin.intel.com + resources: + - gpudeviceplugins/status + verbs: + - get diff --git a/deployments/operator/rbac/kustomization.yaml b/deployments/operator/rbac/kustomization.yaml new file mode 100644 index 00000000..66c28338 --- /dev/null +++ b/deployments/operator/rbac/kustomization.yaml @@ -0,0 +1,12 @@ +resources: +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# Comment the following 4 lines if you want to disable +# the auth proxy (https://github.com/brancz/kube-rbac-proxy) +# which protects your /metrics endpoint. +- auth_proxy_service.yaml +- auth_proxy_role.yaml +- auth_proxy_role_binding.yaml +- auth_proxy_client_clusterrole.yaml diff --git a/deployments/operator/rbac/leader_election_role.yaml b/deployments/operator/rbac/leader_election_role.yaml new file mode 100644 index 00000000..eaa79158 --- /dev/null +++ b/deployments/operator/rbac/leader_election_role.yaml @@ -0,0 +1,32 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - configmaps/status + verbs: + - get + - update + - patch +- apiGroups: + - "" + resources: + - events + verbs: + - create diff --git a/deployments/operator/rbac/leader_election_role_binding.yaml b/deployments/operator/rbac/leader_election_role_binding.yaml new file mode 100644 index 00000000..eed16906 --- /dev/null +++ b/deployments/operator/rbac/leader_election_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: default + namespace: system diff --git a/deployments/operator/rbac/qatdeviceplugin_editor_role.yaml b/deployments/operator/rbac/qatdeviceplugin_editor_role.yaml new file mode 100644 index 00000000..3c01ae1c --- /dev/null +++ b/deployments/operator/rbac/qatdeviceplugin_editor_role.yaml @@ -0,0 +1,24 @@ +# permissions for end users to edit qatdeviceplugins. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: qatdeviceplugin-editor-role +rules: +- apiGroups: + - deviceplugin.intel.com + resources: + - qatdeviceplugins + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - deviceplugin.intel.com + resources: + - qatdeviceplugins/status + verbs: + - get diff --git a/deployments/operator/rbac/qatdeviceplugin_viewer_role.yaml b/deployments/operator/rbac/qatdeviceplugin_viewer_role.yaml new file mode 100644 index 00000000..1b73c40b --- /dev/null +++ b/deployments/operator/rbac/qatdeviceplugin_viewer_role.yaml @@ -0,0 +1,20 @@ +# permissions for end users to view qatdeviceplugins. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: qatdeviceplugin-viewer-role +rules: +- apiGroups: + - deviceplugin.intel.com + resources: + - qatdeviceplugins + verbs: + - get + - list + - watch +- apiGroups: + - deviceplugin.intel.com + resources: + - qatdeviceplugins/status + verbs: + - get diff --git a/deployments/operator/rbac/role.yaml b/deployments/operator/rbac/role.yaml new file mode 100644 index 00000000..847e3090 --- /dev/null +++ b/deployments/operator/rbac/role.yaml @@ -0,0 +1,68 @@ + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: manager-role +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - deviceplugin.intel.com + resources: + - gpudeviceplugins + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - deviceplugin.intel.com + resources: + - gpudeviceplugins/status + verbs: + - get + - patch + - update +- apiGroups: + - deviceplugin.intel.com + resources: + - qatdeviceplugins + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - deviceplugin.intel.com + resources: + - qatdeviceplugins/status + verbs: + - get + - patch + - update diff --git a/deployments/operator/rbac/role_binding.yaml b/deployments/operator/rbac/role_binding.yaml new file mode 100644 index 00000000..8f265870 --- /dev/null +++ b/deployments/operator/rbac/role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: default + namespace: system diff --git a/deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml new file mode 100644 index 00000000..8a464e39 --- /dev/null +++ b/deployments/operator/samples/deviceplugin_v1_gpudeviceplugin.yaml @@ -0,0 +1,6 @@ +apiVersion: deviceplugin.intel.com/v1 +kind: GpuDevicePlugin +metadata: + name: gpudeviceplugin-sample +spec: + image: intel/intel-gpu-plugin:0.18.0 diff --git a/deployments/operator/samples/deviceplugin_v1_qatdeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_qatdeviceplugin.yaml new file mode 100644 index 00000000..2ba85480 --- /dev/null +++ b/deployments/operator/samples/deviceplugin_v1_qatdeviceplugin.yaml @@ -0,0 +1,6 @@ +apiVersion: deviceplugin.intel.com/v1 +kind: QatDevicePlugin +metadata: + name: qatdeviceplugin-sample +spec: + image: intel/intel-qat-plugin:0.18.0 diff --git a/deployments/operator/webhook/kustomization.yaml b/deployments/operator/webhook/kustomization.yaml new file mode 100644 index 00000000..9cf26134 --- /dev/null +++ b/deployments/operator/webhook/kustomization.yaml @@ -0,0 +1,6 @@ +resources: +- manifests.yaml +- service.yaml + +configurations: +- kustomizeconfig.yaml diff --git a/deployments/operator/webhook/kustomizeconfig.yaml b/deployments/operator/webhook/kustomizeconfig.yaml new file mode 100644 index 00000000..25e21e3c --- /dev/null +++ b/deployments/operator/webhook/kustomizeconfig.yaml @@ -0,0 +1,25 @@ +# the following config is for teaching kustomize where to look at when substituting vars. +# It requires kustomize v2.1.0 or newer to work properly. +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: MutatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + +namespace: +- kind: MutatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true +- kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true + +varReference: +- path: metadata/annotations diff --git a/deployments/operator/webhook/manifests.yaml b/deployments/operator/webhook/manifests.yaml new file mode 100644 index 00000000..97820a6e --- /dev/null +++ b/deployments/operator/webhook/manifests.yaml @@ -0,0 +1,88 @@ + +--- +apiVersion: admissionregistration.k8s.io/v1beta1 +kind: MutatingWebhookConfiguration +metadata: + creationTimestamp: null + name: mutating-webhook-configuration +webhooks: +- clientConfig: + caBundle: Cg== + service: + name: webhook-service + namespace: system + path: /mutate-deviceplugin-intel-com-v1-gpudeviceplugin + failurePolicy: Fail + name: mgpudeviceplugin.kb.io + rules: + - apiGroups: + - deviceplugin.intel.com + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - gpudeviceplugins +- clientConfig: + caBundle: Cg== + service: + name: webhook-service + namespace: system + path: /mutate-deviceplugin-intel-com-v1-qatdeviceplugin + failurePolicy: Fail + name: mqatdeviceplugin.kb.io + rules: + - apiGroups: + - deviceplugin.intel.com + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - qatdeviceplugins + +--- +apiVersion: admissionregistration.k8s.io/v1beta1 +kind: ValidatingWebhookConfiguration +metadata: + creationTimestamp: null + name: validating-webhook-configuration +webhooks: +- clientConfig: + caBundle: Cg== + service: + name: webhook-service + namespace: system + path: /validate-deviceplugin-intel-com-v1-gpudeviceplugin + failurePolicy: Fail + name: vgpudeviceplugin.kb.io + rules: + - apiGroups: + - deviceplugin.intel.com + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - gpudeviceplugins +- clientConfig: + caBundle: Cg== + service: + name: webhook-service + namespace: system + path: /validate-deviceplugin-intel-com-v1-qatdeviceplugin + failurePolicy: Fail + name: vqatdeviceplugin.kb.io + rules: + - apiGroups: + - deviceplugin.intel.com + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - qatdeviceplugins diff --git a/deployments/operator/webhook/service.yaml b/deployments/operator/webhook/service.yaml new file mode 100644 index 00000000..31e0f829 --- /dev/null +++ b/deployments/operator/webhook/service.yaml @@ -0,0 +1,12 @@ + +apiVersion: v1 +kind: Service +metadata: + name: webhook-service + namespace: system +spec: + ports: + - port: 443 + targetPort: 9443 + selector: + control-plane: controller-manager diff --git a/go.mod b/go.mod index ef764c70..20ad3617 100644 --- a/go.mod +++ b/go.mod @@ -5,9 +5,10 @@ go 1.13 require ( github.com/fsnotify/fsnotify v1.4.7 github.com/go-ini/ini v1.46.0 + github.com/go-logr/logr v0.1.0 github.com/google/gousb v0.0.0-20190812193832-18f4c1d8a750 github.com/onsi/ginkgo v1.11.0 - github.com/onsi/gomega v1.7.0 + github.com/onsi/gomega v1.8.1 github.com/pkg/errors v0.8.1 golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7 google.golang.org/grpc v1.26.0 @@ -20,6 +21,7 @@ require ( k8s.io/kubelet v0.17.3 k8s.io/kubernetes v1.18.2 k8s.io/utils v0.0.0-20200324210504-a9aa75ae1b89 + sigs.k8s.io/controller-runtime v0.6.0 ) replace ( diff --git a/go.sum b/go.sum index d3770039..4d0df063 100644 --- a/go.sum +++ b/go.sum @@ -107,6 +107,7 @@ github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDD github.com/docker/libnetwork v0.8.0-dev.2.0.20190925143933-c8a5fca4a652/go.mod h1:93m0aTqz6z+g32wla4l4WxTrdtvBRmVzYRkYvasA5Z8= github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96 h1:cenwrSVm+Z7QLSV/BsnenAOcDXdX4cMv4wP0B/5QbPg= github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= +github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -118,8 +119,9 @@ github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/euank/go-kmsg-parser v2.0.0+incompatible/go.mod h1:MhmAMZ8V4CYH4ybgdRwPr2TU5ThnS43puaKEMpja1uw= -github.com/evanphx/json-patch v4.2.0+incompatible h1:fUDGZCv/7iAN7u0puUVhvKCcsR6vRfwrJatElLBEf0I= github.com/evanphx/json-patch v4.2.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch v4.5.0+incompatible h1:ouOWdg56aJriqS0huScTkVXPC5IcNrDCXZ6OoTAWu7M= +github.com/evanphx/json-patch v4.5.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4= github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.6.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -141,7 +143,10 @@ github.com/go-ini/ini v1.46.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3I github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-lintpack/lintpack v0.5.2/go.mod h1:NwZuYi2nUHho8XEIZ6SIxihrnPoqBTDqfpXvXAN0sXM= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logr/logr v0.1.0 h1:M1Tv3VzNlEHg6uyACnRdtrploV2P7wZqH8BoQMtz0cg= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/zapr v0.1.0 h1:h+WVe9j6HAA01niTJPA/kKH0i7e0rLZBCwauQFcRE54= +github.com/go-logr/zapr v0.1.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk= github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8= github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= @@ -210,8 +215,9 @@ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903 h1:LbsanbbD6LieFkXbj9YNNBupiGHJgFeLpO0j0Fza1h8= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.0.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -264,8 +270,9 @@ github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= -github.com/googleapis/gnostic v0.1.0 h1:rVsPeBmXbYv4If/cumu1AzZPwV58q433hvONV1UEZoI= github.com/googleapis/gnostic v0.1.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= +github.com/googleapis/gnostic v0.3.1 h1:WeAefnSUHlBb0iJKwxFDZdbfGwkd7xRNuV+IpXMJhYk= +github.com/googleapis/gnostic v0.3.1/go.mod h1:on+2t9HRStVgn95RSsFWFz+6Q0Snyqv1awfrALZdbtU= github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -294,8 +301,9 @@ github.com/heketi/heketi v9.0.1-0.20190917153846-c2e2a4ab7ab9+incompatible/go.mo github.com/heketi/tests v0.0.0-20151005000721-f3775cbcefd6/go.mod h1:xGMAM8JLi7UkZt1i4FQeQy0R2T8GLUwQhOP5M1gBhy4= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/imdario/mergo v0.3.5 h1:JboBksRwiiAJWvIYJVo46AfV+IAIKZpfrSzVKj42R4Q= github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU= github.com/jimstudt/http-authentication v0.0.0-20140401203705-3eca13d6893a/go.mod h1:wK6yTYYcgjHE1Z1QtXACPDjcFJyBskHEdagmnq3vsP8= @@ -400,8 +408,9 @@ github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGV github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME= github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/onsi/gomega v1.8.1 h1:C5Dqfs/LeauYDX0jJXIe2SWmwCbGzx9yF8C8xy3Lh34= +github.com/onsi/gomega v1.8.1/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= github.com/opencontainers/go-digest v1.0.0-rc1 h1:WzifXhOVOEOuFYOJAW6aQqW0TooG2iki3E3Ii+WN7gQ= github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= @@ -523,8 +532,9 @@ go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qL go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.2/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.uber.org/atomic v1.3.2 h1:2Oa65PReHzfn29GpvgsYwloV9AVFHPDk8tYxt2c2tr4= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM= @@ -660,7 +670,10 @@ golang.org/x/tools v0.0.0-20190617190820-da514acc4774/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190909030654-5b82db07426d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190920225731-5eefd052ad72/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.0.1 h1:xyiBuvkD2g5n7cYzx6u2sxQvsAy4QJsZFCzGVdzOXZ0= +gomodules.xyz/jsonpatch/v2 v2.0.1/go.mod h1:IhYNNY4jnS53ZnfE4PAmpKtDpTCj1JFXc+3mwe7XcUU= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.0.0-20190331200053-3d26580ed485/go.mod h1:2ltnJ7xHfj0zHS40VVPYEAAMTa3ZGguvHGBSJeRWqE0= gonum.org/v1/gonum v0.6.2/go.mod h1:9mxDZsDKxgMAuccQkewq682L+0eCu4dCN2yonUJTCLU= @@ -688,8 +701,9 @@ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= @@ -725,6 +739,7 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.2/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= k8s.io/api v0.18.2 h1:wG5g5ZmSVgm5B+eHMIbI9EGATS2L8Z72rda19RIEgY8= k8s.io/api v0.18.2/go.mod h1:SJCWI7OLzhZSvbY7U8zwNl9UA4o1fizoug34OV/2r78= +k8s.io/apiextensions-apiserver v0.18.2 h1:I4v3/jAuQC+89L3Z7dDgAiN4EOjN6sbm6iBqQwHTah8= k8s.io/apiextensions-apiserver v0.18.2/go.mod h1:q3faSnRGmYimiocj6cHQ1I3WpLqmDgJFlKL37fC4ZvY= k8s.io/apimachinery v0.18.3-beta.0 h1:jfczeoY//Qd3OhgpCnJjNF+C/ezpsQf3VPIT19n0szM= k8s.io/apimachinery v0.18.3-beta.0/go.mod h1:9SnR/e11v5IbyPCGbvJViimtJ0SwHG4nfZFjU77ftcA= @@ -781,6 +796,8 @@ mvdan.cc/unparam v0.0.0-20190209190245-fbb59629db34/go.mod h1:H6SUd1XjIs+qQCyskX rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.7 h1:uuHDyjllyzRyCIvvn0OBjiRB0SgBZGqHNYAmjR7fO50= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.7/go.mod h1:PHgbrJT7lCHcxMU+mDHEm+nx46H4zuuHZkDP6icnhu0= +sigs.k8s.io/controller-runtime v0.6.0 h1:Fzna3DY7c4BIP6KwfSlrfnj20DJ+SeMBK8HSFvOk9NM= +sigs.k8s.io/controller-runtime v0.6.0/go.mod h1:CpYf5pdNY/B352A1TFLAS2JVSlnGQ5O2cftPHndTroo= sigs.k8s.io/kustomize v2.0.3+incompatible/go.mod h1:MkjgH3RdOWrievjo6c9T245dYlB5QeXV4WCbnt/PEpU= sigs.k8s.io/structured-merge-diff/v3 v3.0.0-20200116222232-67a7b8c61874/go.mod h1:PlARxl6Hbt/+BC80dRLi1qAmnMqwqDg62YvvVkZjemw= sigs.k8s.io/structured-merge-diff/v3 v3.0.0 h1:dOmIZBMfhcHS09XZkMyUgkq5trg3/jRyJYFZUiaOp8E= diff --git a/pkg/apis/deviceplugin/v1/gpudeviceplugin_types.go b/pkg/apis/deviceplugin/v1/gpudeviceplugin_types.go new file mode 100644 index 00000000..2530d56d --- /dev/null +++ b/pkg/apis/deviceplugin/v1/gpudeviceplugin_types.go @@ -0,0 +1,93 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// GpuDevicePluginSpec defines the desired state of GpuDevicePlugin. +type GpuDevicePluginSpec struct { + // Important: Run "make generate" to regenerate code after modifying this file + + // Image is a container image with GPU device plugin executable. + Image string `json:"image,omitempty"` + + // SharedDevNum is a number of containers that can share the same GPU device. + // +kubebuilder:validation:Minimum=1 + SharedDevNum int `json:"sharedDevNum,omitempty"` + + // LogLevel sets the plugin's log level. + // +kubebuilder:validation:Minimum=0 + LogLevel int `json:"logLevel,omitempty"` + + // NodeSelector provides a simple way to constrain device plugin pods to nodes with particular labels. + NodeSelector map[string]string `json:"nodeSelector,omitempty"` +} + +// GpuDevicePluginStatus defines the observed state of GpuDevicePlugin. +// TODO(rojkov): consider code deduplication with QatDevicePluginStatus. +type GpuDevicePluginStatus struct { + // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster + // Important: Run "make generate" to regenerate code after modifying this file + + // ControlledDaemoSet references the DaemonSet controlled by the operator. + // +optional + ControlledDaemonSet v1.ObjectReference `json:"controlledDaemonSet,omitempty"` + + // The total number of nodes that should be running the device plugin + // pod (including nodes correctly running the device plugin pod). + DesiredNumberScheduled int32 `json:"desiredNumberScheduled"` + + // The number of nodes that should be running the device plugin pod and have one + // or more of the device plugin pod running and ready. + NumberReady int32 `json:"numberReady"` + + // The list of Node names where the device plugin pods are running. + // +optional + NodeNames []string `json:"nodeNames,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Desired",type=integer,JSONPath=`.status.desiredNumberScheduled` +// +kubebuilder:printcolumn:name="Ready",type=integer,JSONPath=`.status.numberReady` +// +kubebuilder:printcolumn:name="Node Selector",type=string,JSONPath=`.spec.nodeSelector` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// GpuDevicePlugin is the Schema for the gpudeviceplugins API. +type GpuDevicePlugin struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec GpuDevicePluginSpec `json:"spec,omitempty"` + Status GpuDevicePluginStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// GpuDevicePluginList contains a list of GpuDevicePlugin. +type GpuDevicePluginList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []GpuDevicePlugin `json:"items"` +} + +func init() { + SchemeBuilder.Register(&GpuDevicePlugin{}, &GpuDevicePluginList{}) +} diff --git a/pkg/apis/deviceplugin/v1/gpudeviceplugin_webhook.go b/pkg/apis/deviceplugin/v1/gpudeviceplugin_webhook.go new file mode 100644 index 00000000..6064b486 --- /dev/null +++ b/pkg/apis/deviceplugin/v1/gpudeviceplugin_webhook.go @@ -0,0 +1,114 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "strings" + + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/version" + ctrl "sigs.k8s.io/controller-runtime" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers" +) + +const ( + gpuPluginKind = "GpuDevicePlugin" +) + +var ( + // gpudevicepluginlog is for logging in this package. + gpudevicepluginlog = logf.Log.WithName("gpudeviceplugin-resource") + + gpuMinVersion = version.MustParseSemantic("0.18.0") +) + +// SetupWebhookWithManager sets up a webhook for GpuDevicePlugin custom resources. +func (r *GpuDevicePlugin) SetupWebhookWithManager(mgr ctrl.Manager) error { + return ctrl.NewWebhookManagedBy(mgr). + For(r). + Complete() +} + +// +kubebuilder:webhook:path=/mutate-deviceplugin-intel-com-v1-gpudeviceplugin,mutating=true,failurePolicy=fail,groups=deviceplugin.intel.com,resources=gpudeviceplugins,verbs=create;update,versions=v1,name=mgpudeviceplugin.kb.io + +var _ webhook.Defaulter = &GpuDevicePlugin{} + +// Default implements webhook.Defaulter so a webhook will be registered for the type. +func (r *GpuDevicePlugin) Default() { + gpudevicepluginlog.Info("default", "name", r.Name) + + if len(r.Spec.Image) == 0 { + r.Spec.Image = "intel/intel-gpu-plugin:0.18.0" + } +} + +// +kubebuilder:webhook:verbs=create;update,path=/validate-deviceplugin-intel-com-v1-gpudeviceplugin,mutating=false,failurePolicy=fail,groups=deviceplugin.intel.com,resources=gpudeviceplugins,versions=v1,name=vgpudeviceplugin.kb.io + +var _ webhook.Validator = &GpuDevicePlugin{} + +// ValidateCreate implements webhook.Validator so a webhook will be registered for the type. +func (r *GpuDevicePlugin) ValidateCreate() error { + gpudevicepluginlog.Info("validate create", "name", r.Name) + + if controllers.GetDevicePluginCount(gpuPluginKind) > 0 { + return errors.Errorf("an instance of %q already exists in the cluster", gpuPluginKind) + } + + return r.validatePlugin() +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type. +func (r *GpuDevicePlugin) ValidateUpdate(old runtime.Object) error { + gpudevicepluginlog.Info("validate update", "name", r.Name) + + return r.validatePlugin() +} + +// ValidateDelete implements webhook.Validator so a webhook will be registered for the type. +func (r *GpuDevicePlugin) ValidateDelete() error { + gpudevicepluginlog.Info("validate delete", "name", r.Name) + + return nil +} + +func (r *GpuDevicePlugin) validatePlugin() error { + parts := strings.SplitN(r.Spec.Image, ":", 2) + if len(parts) != 2 { + return errors.Errorf("incorrect image field %q", r.Spec.Image) + } + namespacedName := parts[0] + versionStr := parts[1] + + parts = strings.Split(namespacedName, "/") + name := parts[len(parts)-1] + if name != "intel-gpu-plugin" { + return errors.Errorf("incorrect image name %q. Make sure you use '/image-gpu-plugin:'", name) + } + + ver, err := version.ParseSemantic(versionStr) + if err != nil { + return errors.Wrapf(err, "unable to parse version %q", versionStr) + } + + if !ver.AtLeast(gpuMinVersion) { + return errors.Errorf("version %q is too low. Should be at least %q", ver, gpuMinVersion) + } + + return nil +} diff --git a/pkg/apis/deviceplugin/v1/groupversion_info.go b/pkg/apis/deviceplugin/v1/groupversion_info.go new file mode 100644 index 00000000..a4e95e6f --- /dev/null +++ b/pkg/apis/deviceplugin/v1/groupversion_info.go @@ -0,0 +1,34 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package v1 contains API Schema definitions for the deviceplugin v1 API group +// +kubebuilder:object:generate=true +// +groupName=deviceplugin.intel.com +package v1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "deviceplugin.intel.com", Version: "v1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/pkg/apis/deviceplugin/v1/qatdeviceplugin_types.go b/pkg/apis/deviceplugin/v1/qatdeviceplugin_types.go new file mode 100644 index 00000000..a0f66850 --- /dev/null +++ b/pkg/apis/deviceplugin/v1/qatdeviceplugin_types.go @@ -0,0 +1,105 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// +kubebuilder:validation:Enum=dh895xccvf;c6xxvf;c3xxxvf;d15xxvf + +// KernelVfDriver is a VF device driver for QuickAssist devices. +type KernelVfDriver string + +// QatDevicePluginSpec defines the desired state of QatDevicePlugin. +type QatDevicePluginSpec struct { + // Important: Run "make generate" to regenerate code after modifying this file. + + // Image is a container image with QAT device plugin executable. + Image string `json:"image,omitempty"` + + // DpdkDriver is a DPDK device driver for configuring the QAT device. + // +kubebuilder:validation:Enum=igb_uio;vfio-pci + DpdkDriver string `json:"dpdkDriver,omitempty"` + + // KernelVfDrivers is a list of VF device drivers for the QuickAssist devices in the system. + KernelVfDrivers []KernelVfDriver `json:"kernelVfDrivers,omitempty"` + + // MaxNumDevices is a maximum number of QAT devices to be provided to the QuickAssist device plugin + // +kubebuilder:validation:Minimum=1 + MaxNumDevices int `json:"maxNumDevices,omitempty"` + + // LogLevel sets the plugin's log level. + // +kubebuilder:validation:Minimum=0 + LogLevel int `json:"logLevel,omitempty"` + + // NodeSelector provides a simple way to constrain device plugin pods to nodes with particular labels. + NodeSelector map[string]string `json:"nodeSelector,omitempty"` +} + +// QatDevicePluginStatus defines the observed state of QatDevicePlugin. +// TODO(rojkov): consider code deduplication with GpuDevicePluginStatus. +type QatDevicePluginStatus struct { + // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster + // Important: Run "make generate" to regenerate code after modifying this file + + // ControlledDaemoSet references the DaemonSet controlled by the operator. + // +optional + ControlledDaemonSet v1.ObjectReference `json:"controlledDaemonSet,omitempty"` + + // The total number of nodes that should be running the device plugin + // pod (including nodes correctly running the device plugin pod). + DesiredNumberScheduled int32 `json:"desiredNumberScheduled"` + + // The number of nodes that should be running the device plugin pod and have one + // or more of the device plugin pod running and ready. + NumberReady int32 `json:"numberReady"` + + // The list of Node names where the device plugin pods are running. + // +optional + NodeNames []string `json:"nodeNames,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Desired",type=integer,JSONPath=`.status.desiredNumberScheduled` +// +kubebuilder:printcolumn:name="Ready",type=integer,JSONPath=`.status.numberReady` +// +kubebuilder:printcolumn:name="Node Selector",type=string,JSONPath=`.spec.nodeSelector` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// QatDevicePlugin is the Schema for the qatdeviceplugins API. +type QatDevicePlugin struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec QatDevicePluginSpec `json:"spec,omitempty"` + Status QatDevicePluginStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// QatDevicePluginList contains a list of QatDevicePlugin. +type QatDevicePluginList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []QatDevicePlugin `json:"items"` +} + +func init() { + SchemeBuilder.Register(&QatDevicePlugin{}, &QatDevicePluginList{}) +} diff --git a/pkg/apis/deviceplugin/v1/qatdeviceplugin_webhook.go b/pkg/apis/deviceplugin/v1/qatdeviceplugin_webhook.go new file mode 100644 index 00000000..8459064a --- /dev/null +++ b/pkg/apis/deviceplugin/v1/qatdeviceplugin_webhook.go @@ -0,0 +1,114 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "strings" + + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/version" + ctrl "sigs.k8s.io/controller-runtime" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers" +) + +const ( + qatPluginKind = "QatDevicePlugin" +) + +var ( + // qatdevicepluginlog is for logging in this package. + qatdevicepluginlog = logf.Log.WithName("qatdeviceplugin-resource") + + qatMinVersion = version.MustParseSemantic("0.18.0") +) + +// SetupWebhookWithManager sets up a webhook for QatDevicePlugin custom resources. +func (r *QatDevicePlugin) SetupWebhookWithManager(mgr ctrl.Manager) error { + return ctrl.NewWebhookManagedBy(mgr). + For(r). + Complete() +} + +// +kubebuilder:webhook:path=/mutate-deviceplugin-intel-com-v1-qatdeviceplugin,mutating=true,failurePolicy=fail,groups=deviceplugin.intel.com,resources=qatdeviceplugins,verbs=create;update,versions=v1,name=mqatdeviceplugin.kb.io + +var _ webhook.Defaulter = &QatDevicePlugin{} + +// Default implements webhook.Defaulter so a webhook will be registered for the type. +func (r *QatDevicePlugin) Default() { + qatdevicepluginlog.Info("default", "name", r.Name) + + if len(r.Spec.Image) == 0 { + r.Spec.Image = "intel/intel-qat-plugin:0.18.0" + } +} + +// +kubebuilder:webhook:verbs=create;update,path=/validate-deviceplugin-intel-com-v1-qatdeviceplugin,mutating=false,failurePolicy=fail,groups=deviceplugin.intel.com,resources=qatdeviceplugins,versions=v1,name=vqatdeviceplugin.kb.io + +var _ webhook.Validator = &QatDevicePlugin{} + +// ValidateCreate implements webhook.Validator so a webhook will be registered for the type. +func (r *QatDevicePlugin) ValidateCreate() error { + qatdevicepluginlog.Info("validate create", "name", r.Name) + + if controllers.GetDevicePluginCount(qatPluginKind) > 0 { + return errors.Errorf("an instance of %q already exists in the cluster", qatPluginKind) + } + + return r.validatePlugin() +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type. +func (r *QatDevicePlugin) ValidateUpdate(old runtime.Object) error { + qatdevicepluginlog.Info("validate update", "name", r.Name) + + return r.validatePlugin() +} + +// ValidateDelete implements webhook.Validator so a webhook will be registered for the type. +func (r *QatDevicePlugin) ValidateDelete() error { + qatdevicepluginlog.Info("validate delete", "name", r.Name) + + return nil +} + +func (r *QatDevicePlugin) validatePlugin() error { + parts := strings.SplitN(r.Spec.Image, ":", 2) + if len(parts) != 2 { + return errors.Errorf("incorrect image field %q", r.Spec.Image) + } + namespacedName := parts[0] + versionStr := parts[1] + + parts = strings.Split(namespacedName, "/") + name := parts[len(parts)-1] + if name != "intel-qat-plugin" { + return errors.Errorf("incorrect image name %q. Make sure you use '/image-qat-plugin:'", name) + } + + ver, err := version.ParseSemantic(versionStr) + if err != nil { + return errors.Wrapf(err, "unable to parse version %q", versionStr) + } + + if !ver.AtLeast(qatMinVersion) { + return errors.Errorf("version %q is too low. Should be at least %q", ver, qatMinVersion) + } + + return nil +} diff --git a/pkg/apis/deviceplugin/v1/zz_generated.deepcopy.go b/pkg/apis/deviceplugin/v1/zz_generated.deepcopy.go new file mode 100644 index 00000000..f1ca0789 --- /dev/null +++ b/pkg/apis/deviceplugin/v1/zz_generated.deepcopy.go @@ -0,0 +1,232 @@ +// +build !ignore_autogenerated + +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by controller-gen. DO NOT EDIT. + +package v1 + +import ( + "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GpuDevicePlugin) DeepCopyInto(out *GpuDevicePlugin) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GpuDevicePlugin. +func (in *GpuDevicePlugin) DeepCopy() *GpuDevicePlugin { + if in == nil { + return nil + } + out := new(GpuDevicePlugin) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GpuDevicePlugin) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GpuDevicePluginList) DeepCopyInto(out *GpuDevicePluginList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]GpuDevicePlugin, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GpuDevicePluginList. +func (in *GpuDevicePluginList) DeepCopy() *GpuDevicePluginList { + if in == nil { + return nil + } + out := new(GpuDevicePluginList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GpuDevicePluginList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GpuDevicePluginSpec) DeepCopyInto(out *GpuDevicePluginSpec) { + *out = *in + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GpuDevicePluginSpec. +func (in *GpuDevicePluginSpec) DeepCopy() *GpuDevicePluginSpec { + if in == nil { + return nil + } + out := new(GpuDevicePluginSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GpuDevicePluginStatus) DeepCopyInto(out *GpuDevicePluginStatus) { + *out = *in + out.ControlledDaemonSet = in.ControlledDaemonSet + if in.NodeNames != nil { + in, out := &in.NodeNames, &out.NodeNames + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GpuDevicePluginStatus. +func (in *GpuDevicePluginStatus) DeepCopy() *GpuDevicePluginStatus { + if in == nil { + return nil + } + out := new(GpuDevicePluginStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *QatDevicePlugin) DeepCopyInto(out *QatDevicePlugin) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QatDevicePlugin. +func (in *QatDevicePlugin) DeepCopy() *QatDevicePlugin { + if in == nil { + return nil + } + out := new(QatDevicePlugin) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *QatDevicePlugin) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *QatDevicePluginList) DeepCopyInto(out *QatDevicePluginList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]QatDevicePlugin, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QatDevicePluginList. +func (in *QatDevicePluginList) DeepCopy() *QatDevicePluginList { + if in == nil { + return nil + } + out := new(QatDevicePluginList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *QatDevicePluginList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *QatDevicePluginSpec) DeepCopyInto(out *QatDevicePluginSpec) { + *out = *in + if in.KernelVfDrivers != nil { + in, out := &in.KernelVfDrivers, &out.KernelVfDrivers + *out = make([]KernelVfDriver, len(*in)) + copy(*out, *in) + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QatDevicePluginSpec. +func (in *QatDevicePluginSpec) DeepCopy() *QatDevicePluginSpec { + if in == nil { + return nil + } + out := new(QatDevicePluginSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *QatDevicePluginStatus) DeepCopyInto(out *QatDevicePluginStatus) { + *out = *in + out.ControlledDaemonSet = in.ControlledDaemonSet + if in.NodeNames != nil { + in, out := &in.NodeNames, &out.NodeNames + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QatDevicePluginStatus. +func (in *QatDevicePluginStatus) DeepCopy() *QatDevicePluginStatus { + if in == nil { + return nil + } + out := new(QatDevicePluginStatus) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/controllers/gpu/controller.go b/pkg/controllers/gpu/controller.go new file mode 100644 index 00000000..e1da940c --- /dev/null +++ b/pkg/controllers/gpu/controller.go @@ -0,0 +1,240 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package gpu contains GPU specific reconciliation logic. +package gpu + +import ( + "context" + "reflect" + "strconv" + "strings" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/reference" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + devicepluginv1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/deviceplugin/v1" + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers" + "github.com/pkg/errors" +) + +const ( + ownerKey = ".metadata.controller.gpu" + appLabel = "intel-gpu-plugin" +) + +// +kubebuilder:rbac:groups=deviceplugin.intel.com,resources=gpudeviceplugins,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=deviceplugin.intel.com,resources=gpudeviceplugins/status,verbs=get;update;patch + +// SetupReconciler creates a new reconciler for GpuDevicePlugin objects. +func SetupReconciler(mgr ctrl.Manager) error { + c := &controller{scheme: mgr.GetScheme()} + return controllers.SetupWithManager(mgr, c, devicepluginv1.GroupVersion.String(), "GpuDevicePlugin", ownerKey) +} + +type controller struct { + scheme *runtime.Scheme +} + +func (c *controller) CreateEmptyObject() runtime.Object { + return &devicepluginv1.GpuDevicePlugin{} +} + +func (c *controller) GetTotalObjectCount(ctx context.Context, clnt client.Client) (int, error) { + var list devicepluginv1.GpuDevicePluginList + if err := clnt.List(ctx, &list); err != nil { + return 0, err + } + + return len(list.Items), nil +} + +func (c *controller) NewDaemonSet(rawObj runtime.Object) *apps.DaemonSet { + devicePlugin := rawObj.(*devicepluginv1.GpuDevicePlugin) + + var nodeSelector map[string]string + dpNodeSelectorSize := len(devicePlugin.Spec.NodeSelector) + if dpNodeSelectorSize > 0 { + nodeSelector = make(map[string]string, dpNodeSelectorSize+1) + for k, v := range devicePlugin.Spec.NodeSelector { + nodeSelector[k] = v + } + nodeSelector["kubernetes.io/arch"] = "amd64" + } else { + nodeSelector = map[string]string{"kubernetes.io/arch": "amd64"} + } + + yes := true + return &apps.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: devicePlugin.Namespace, + GenerateName: devicePlugin.Name + "-", + Labels: map[string]string{ + "app": appLabel, + }, + }, + Spec: apps.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": appLabel, + }, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": appLabel, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: appLabel, + Env: []v1.EnvVar{ + { + Name: "NODE_NAME", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + }, + }, + }, + Args: getPodArgs(devicePlugin), + Image: devicePlugin.Spec.Image, + ImagePullPolicy: "IfNotPresent", + SecurityContext: &v1.SecurityContext{ + ReadOnlyRootFilesystem: &yes, + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "devfs", + MountPath: "/dev/dri", + ReadOnly: true, + }, + { + Name: "sysfs", + MountPath: "/sys/class/drm", + ReadOnly: true, + }, + { + Name: "kubeletsockets", + MountPath: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + NodeSelector: nodeSelector, + Volumes: []v1.Volume{ + { + Name: "devfs", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/dev/dri", + }, + }, + }, + { + Name: "sysfs", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/sys/class/drm", + }, + }, + }, + { + Name: "kubeletsockets", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + }, + }, + }, + } +} + +func (c *controller) UpdateDaemonSet(rawObj runtime.Object, ds *apps.DaemonSet) (updated bool) { + dp := rawObj.(*devicepluginv1.GpuDevicePlugin) + + if ds.Spec.Template.Spec.Containers[0].Image != dp.Spec.Image { + ds.Spec.Template.Spec.Containers[0].Image = dp.Spec.Image + updated = true + } + + dp.Spec.NodeSelector["kubernetes.io/arch"] = "amd64" + if !reflect.DeepEqual(ds.Spec.Template.Spec.NodeSelector, dp.Spec.NodeSelector) { + ds.Spec.Template.Spec.NodeSelector = dp.Spec.NodeSelector + updated = true + } + + newargs := getPodArgs(dp) + if strings.Join(ds.Spec.Template.Spec.Containers[0].Args, " ") != strings.Join(newargs, " ") { + ds.Spec.Template.Spec.Containers[0].Args = newargs + updated = true + } + + return updated +} + +func (c *controller) UpdateStatus(rawObj runtime.Object, ds *apps.DaemonSet, nodeNames []string) (updated bool, err error) { + dp := rawObj.(*devicepluginv1.GpuDevicePlugin) + + dsRef, err := reference.GetReference(c.scheme, ds) + if err != nil { + return false, errors.Wrap(err, "unable to make reference to controlled daemon set") + } + + if dp.Status.ControlledDaemonSet.UID != dsRef.UID { + dp.Status.ControlledDaemonSet = *dsRef + updated = true + } + + if dp.Status.DesiredNumberScheduled != ds.Status.DesiredNumberScheduled { + dp.Status.DesiredNumberScheduled = ds.Status.DesiredNumberScheduled + updated = true + } + + if dp.Status.NumberReady != ds.Status.NumberReady { + dp.Status.NumberReady = ds.Status.NumberReady + updated = true + } + + if strings.Join(dp.Status.NodeNames, ",") != strings.Join(nodeNames, ",") { + dp.Status.NodeNames = nodeNames + updated = true + } + + return updated, nil +} + +func getPodArgs(gdp *devicepluginv1.GpuDevicePlugin) []string { + args := make([]string, 0, 4) + args = append(args, "-v", strconv.Itoa(gdp.Spec.LogLevel)) + + if gdp.Spec.SharedDevNum > 0 { + args = append(args, "-shared-dev-num", strconv.Itoa(gdp.Spec.SharedDevNum)) + } else { + args = append(args, "-shared-dev-num", "1") + } + + return args +} diff --git a/pkg/controllers/qat/controller.go b/pkg/controllers/qat/controller.go new file mode 100644 index 00000000..e2887980 --- /dev/null +++ b/pkg/controllers/qat/controller.go @@ -0,0 +1,231 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package qat contains QAT specific reconciliation logic. +package qat + +import ( + "context" + "reflect" + "strconv" + "strings" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/reference" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + devicepluginv1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/deviceplugin/v1" + "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers" + "github.com/pkg/errors" +) + +const ( + ownerKey = ".metadata.controller.qat" + appLabel = "intel-qat-plugin" +) + +// +kubebuilder:rbac:groups=deviceplugin.intel.com,resources=qatdeviceplugins,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=deviceplugin.intel.com,resources=qatdeviceplugins/status,verbs=get;update;patch + +// SetupReconciler creates a new reconciler for QatDevicePlugin objects. +func SetupReconciler(mgr ctrl.Manager) error { + c := &controller{scheme: mgr.GetScheme()} + return controllers.SetupWithManager(mgr, c, devicepluginv1.GroupVersion.String(), "QatDevicePlugin", ownerKey) +} + +type controller struct { + scheme *runtime.Scheme +} + +func (c *controller) CreateEmptyObject() runtime.Object { + return &devicepluginv1.QatDevicePlugin{} +} + +func (c *controller) GetTotalObjectCount(ctx context.Context, clnt client.Client) (int, error) { + var list devicepluginv1.QatDevicePluginList + if err := clnt.List(ctx, &list); err != nil { + return 0, err + } + + return len(list.Items), nil +} + +func (c *controller) NewDaemonSet(rawObj runtime.Object) *apps.DaemonSet { + devicePlugin := rawObj.(*devicepluginv1.QatDevicePlugin) + yes := true + return &apps.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: devicePlugin.Namespace, + GenerateName: devicePlugin.Name + "-", + Labels: map[string]string{ + "app": appLabel, + }, + }, + Spec: apps.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": appLabel, + }, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": appLabel, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: appLabel, + Args: getPodArgs(devicePlugin), + Image: devicePlugin.Spec.Image, + ImagePullPolicy: "IfNotPresent", + SecurityContext: &v1.SecurityContext{ + ReadOnlyRootFilesystem: &yes, + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "devdir", + MountPath: "/dev/vfio", + ReadOnly: true, + }, + { + Name: "pcidir", + MountPath: "/sys/bus/pci", + }, + { + Name: "kubeletsockets", + MountPath: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + NodeSelector: devicePlugin.Spec.NodeSelector, + Volumes: []v1.Volume{ + { + Name: "devdir", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/dev/vfio", + }, + }, + }, + { + Name: "pcidir", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/sys/bus/pci", + }, + }, + }, + { + Name: "kubeletsockets", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + }, + }, + }, + } +} + +func (c *controller) UpdateDaemonSet(rawObj runtime.Object, ds *apps.DaemonSet) (updated bool) { + dp := rawObj.(*devicepluginv1.QatDevicePlugin) + + if ds.Spec.Template.Spec.Containers[0].Image != dp.Spec.Image { + ds.Spec.Template.Spec.Containers[0].Image = dp.Spec.Image + updated = true + } + + if !reflect.DeepEqual(ds.Spec.Template.Spec.NodeSelector, dp.Spec.NodeSelector) { + ds.Spec.Template.Spec.NodeSelector = dp.Spec.NodeSelector + updated = true + } + + newargs := getPodArgs(dp) + if strings.Join(ds.Spec.Template.Spec.Containers[0].Args, " ") != strings.Join(newargs, " ") { + ds.Spec.Template.Spec.Containers[0].Args = newargs + updated = true + } + + return updated +} + +func (c *controller) UpdateStatus(rawObj runtime.Object, ds *apps.DaemonSet, nodeNames []string) (updated bool, err error) { + dp := rawObj.(*devicepluginv1.QatDevicePlugin) + + dsRef, err := reference.GetReference(c.scheme, ds) + if err != nil { + return false, errors.Wrap(err, "unable to make reference to controlled daemon set") + } + + if dp.Status.ControlledDaemonSet.UID != dsRef.UID { + dp.Status.ControlledDaemonSet = *dsRef + updated = true + } + + if dp.Status.DesiredNumberScheduled != ds.Status.DesiredNumberScheduled { + dp.Status.DesiredNumberScheduled = ds.Status.DesiredNumberScheduled + updated = true + } + + if dp.Status.NumberReady != ds.Status.NumberReady { + dp.Status.NumberReady = ds.Status.NumberReady + updated = true + } + + if strings.Join(dp.Status.NodeNames, ",") != strings.Join(nodeNames, ",") { + dp.Status.NodeNames = nodeNames + updated = true + } + + return updated, nil +} + +func getPodArgs(qdp *devicepluginv1.QatDevicePlugin) []string { + args := make([]string, 0, 8) + args = append(args, "-v", strconv.Itoa(qdp.Spec.LogLevel)) + + if qdp.Spec.DpdkDriver != "" { + args = append(args, "-dpdk-driver", qdp.Spec.DpdkDriver) + } else { + args = append(args, "-dpdk-driver", "vfio-pci") + } + + if len(qdp.Spec.KernelVfDrivers) > 0 { + drvs := make([]string, len(qdp.Spec.KernelVfDrivers)) + for i, v := range qdp.Spec.KernelVfDrivers { + drvs[i] = string(v) + } + args = append(args, "-kernel-vf-drivers", strings.Join(drvs, ",")) + } else { + args = append(args, "-kernel-vf-drivers", "dh895xccvf,c6xxvf,c3xxxvf,d15xxvf") + } + + if qdp.Spec.MaxNumDevices > 0 { + args = append(args, "-max-num-devices", strconv.Itoa(qdp.Spec.MaxNumDevices)) + } else { + args = append(args, "-max-num-devices", "32") + } + + return args +} diff --git a/pkg/controllers/reconciler.go b/pkg/controllers/reconciler.go new file mode 100644 index 00000000..60e97731 --- /dev/null +++ b/pkg/controllers/reconciler.go @@ -0,0 +1,269 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package controllers contains code common for the device plugin controllers. +package controllers + +import ( + "context" + "sync" + + "github.com/go-logr/logr" + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var ( + bKeeper = &bookKeeper{} +) + +func init() { + bKeeper.pluginCounter = make(map[string]int) +} + +type bookKeeper struct { + sync.Mutex + pluginCounter map[string]int +} + +func (b *bookKeeper) set(pluginKind string, count int) { + b.Lock() + defer b.Unlock() + + b.pluginCounter[pluginKind] = count +} + +func (b *bookKeeper) count(pluginKind string) int { + b.Lock() + defer b.Unlock() + + return b.pluginCounter[pluginKind] +} + +// GetDevicePluginCount returns number of device plugin CRs registered. +func GetDevicePluginCount(pluginKind string) int { + return bKeeper.count(pluginKind) +} + +// +kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch + +// DevicePluginController provides functionality for manipulating actual device plugin CRD objects. +type DevicePluginController interface { + CreateEmptyObject() (devicePlugin runtime.Object) + GetTotalObjectCount(ctx context.Context, client client.Client) (count int, err error) + NewDaemonSet(devicePlugin runtime.Object) *apps.DaemonSet + UpdateDaemonSet(runtime.Object, *apps.DaemonSet) (updated bool) + UpdateStatus(runtime.Object, *apps.DaemonSet, []string) (updated bool, err error) +} + +type reconciler struct { + client.Client + log logr.Logger + scheme *runtime.Scheme + pluginKind string + ownerKey string + controller DevicePluginController +} + +// Reconcile reconciles a device plugin object. +func (r *reconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { + ctx := context.Background() + log := r.log.WithValues(r.pluginKind, req.NamespacedName) + + if err := r.updateBookKeeper(ctx); err != nil { + log.Error(err, "unable to total count of device plugins") + return ctrl.Result{}, err + } + + // Fetch the plugin's DaemonSet. + var childDaemonSets apps.DaemonSetList + if err := r.List(ctx, &childDaemonSets, client.InNamespace(req.Namespace), client.MatchingFields{r.ownerKey: req.Name}); err != nil { + log.Error(err, "unable to list child DaemonSets") + return ctrl.Result{}, err + } + + devicePlugin := r.controller.CreateEmptyObject() + if err := r.Get(ctx, req.NamespacedName, devicePlugin); err != nil { + return r.maybeDeleteDaemoSets(ctx, err, childDaemonSets.Items, log) + } + + // Create a daemon set for the plugin if it doesn't exist. + if len(childDaemonSets.Items) == 0 { + return r.createDaemonSet(ctx, devicePlugin, log) + } + + ds := &childDaemonSets.Items[0] + + // Synchronize the DaemonSet with its owner. + if r.controller.UpdateDaemonSet(devicePlugin, ds) { + if err := r.Update(ctx, ds); err != nil { + log.Error(err, "unable to update DaemonSet", "DaemonSet", ds) + return ctrl.Result{}, err + } + } + + // Fetch the pods controlled by the controller's DaemonSet to list nodes + var pods v1.PodList + if err := r.List(ctx, &pods, client.InNamespace(ds.Namespace), client.MatchingFields{r.ownerKey: ds.Name}); err != nil { + log.Error(err, "unable to list child Pods of the controlled daemon set") + return ctrl.Result{}, err + } + nodeNames := make([]string, len(pods.Items)) + for i, pod := range pods.Items { + nodeNames[i] = pod.Spec.NodeName + } + + // Update status + statusUpdated, err := r.controller.UpdateStatus(devicePlugin, &childDaemonSets.Items[0], nodeNames) + if err != nil { + return ctrl.Result{}, err + } + if statusUpdated { + if err := r.Status().Update(ctx, devicePlugin); apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } else if err != nil { + log.Error(err, "unable to update device plugin status") + return ctrl.Result{}, err + } + } + + // Drop redundant daemon sets if any. + r.maybeDeleteRedundantDaemonSets(ctx, childDaemonSets.Items, log) + + return ctrl.Result{}, nil +} + +// SetupWithManager sets up a reconciler for a given device plugin controller. +func SetupWithManager(mgr ctrl.Manager, controller DevicePluginController, apiGVString, pluginKind, ownerKey string) error { + r := &reconciler{ + Client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName(pluginKind), + scheme: mgr.GetScheme(), + ownerKey: ownerKey, + controller: controller, + pluginKind: pluginKind, + } + + ctx := context.Background() + + // Index DaemonSets with their owner (e.g. QatDevicePlugin). + if err := mgr.GetFieldIndexer().IndexField(ctx, &apps.DaemonSet{}, ownerKey, + func(rawObj runtime.Object) []string { + // grab the DaemonSet object, extract the owner... + ds := rawObj.(*apps.DaemonSet) + owner := metav1.GetControllerOf(ds) + if owner == nil { + return nil + } + + // make sure it's a device plugin + if owner.APIVersion != apiGVString || owner.Kind != pluginKind { + return nil + } + + // and if so, return it. + return []string{owner.Name} + }); err != nil { + return err + } + + // Index Pods with their owner (DaemonSet). + if err := mgr.GetFieldIndexer().IndexField(ctx, &v1.Pod{}, ownerKey, + func(rawObj runtime.Object) []string { + // grab the Pod object, extract the owner... + pod := rawObj.(*v1.Pod) + owner := metav1.GetControllerOf(pod) + if owner == nil { + return nil + } + + // make sure it's a DaemonSet + if owner.APIVersion != apps.SchemeGroupVersion.String() || owner.Kind != "DaemonSet" { + return nil + } + + // and if so, return it. + return []string{owner.Name} + }); err != nil { + return err + } + + return ctrl.NewControllerManagedBy(mgr). + For(r.controller.CreateEmptyObject()). + Owns(&apps.DaemonSet{}). + Complete(r) +} + +func (r *reconciler) updateBookKeeper(ctx context.Context) error { + count, err := r.controller.GetTotalObjectCount(ctx, r) + if err != nil { + return err + } + + bKeeper.set(r.pluginKind, count) + return nil +} + +func (r *reconciler) createDaemonSet(ctx context.Context, dp runtime.Object, log logr.Logger) (ctrl.Result, error) { + ds := r.controller.NewDaemonSet(dp) + + if err := ctrl.SetControllerReference(dp.(metav1.Object), ds, r.scheme); err != nil { + log.Error(err, "unable to set controller reference") + return ctrl.Result{}, err + } + + if err := r.Create(ctx, ds); err != nil { + log.Error(err, "unable to create DaemonSet") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +func (r *reconciler) maybeDeleteDaemoSets(ctx context.Context, err error, daemonSets []apps.DaemonSet, log logr.Logger) (ctrl.Result, error) { + if apierrors.IsNotFound(err) { + for _, ds := range daemonSets { + if err = r.Delete(ctx, ds.DeepCopyObject(), client.PropagationPolicy(metav1.DeletePropagationBackground)); client.IgnoreNotFound(err) != nil { + log.Error(err, "unable to delete DaemonSet", "DaemonSet", ds) + return ctrl.Result{}, err + } + } + + log.V(1).Info("deleted DaemonSets owned by deleted custom device plugin object") + return ctrl.Result{}, nil + } + + log.Error(err, "unable to fetch custom device plugin object") + return ctrl.Result{}, err +} + +func (r *reconciler) maybeDeleteRedundantDaemonSets(ctx context.Context, dsets []apps.DaemonSet, log logr.Logger) { + count := len(dsets) + if count > 1 { + log.V(0).Info("there are redundant DaemonSets", "redundantDS", count-1) + for _, ds := range dsets[1:] { + if err := r.Delete(ctx, ds.DeepCopyObject(), client.PropagationPolicy(metav1.DeletePropagationBackground)); client.IgnoreNotFound(err) != nil { + log.Error(err, "unable to delete redundant DaemonSet", "DaemonSet", ds) + } else { + log.V(1).Info("deleted redundant DaemonSet", "DaemonSet", ds) + } + } + } +} diff --git a/test/envtest/gpudeviceplugin_controller_test.go b/test/envtest/gpudeviceplugin_controller_test.go new file mode 100644 index 00000000..c8593bfe --- /dev/null +++ b/test/envtest/gpudeviceplugin_controller_test.go @@ -0,0 +1,87 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package envtest + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + devicepluginv1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/deviceplugin/v1" +) + +var _ = Describe("GpuDevicePlugin Controller", func() { + + const timeout = time.Second * 30 + const interval = time.Second * 1 + + Context("Basic CRUD operations", func() { + It("should handle GpuDevicePlugin objects correctly", func() { + spec := devicepluginv1.GpuDevicePluginSpec{ + Image: "testimage", + } + + key := types.NamespacedName{ + Name: "gpudeviceplugin-test", + Namespace: "default", + } + + toCreate := &devicepluginv1.GpuDevicePlugin{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + Spec: spec, + } + + By("creating GpuDevicePlugin successfully") + Expect(k8sClient.Create(context.Background(), toCreate)).Should(Succeed()) + time.Sleep(time.Second * 5) + + fetched := &devicepluginv1.GpuDevicePlugin{} + Eventually(func() bool { + k8sClient.Get(context.Background(), key, fetched) + return len(fetched.Status.ControlledDaemonSet.UID) > 0 + }, timeout, interval).Should(BeTrue()) + + By("updating image name successfully") + updatedImage := "updated-testimage" + fetched.Spec.Image = updatedImage + + Expect(k8sClient.Update(context.Background(), fetched)).Should(Succeed()) + fetchedUpdated := &devicepluginv1.GpuDevicePlugin{} + Eventually(func() string { + k8sClient.Get(context.Background(), key, fetchedUpdated) + return fetchedUpdated.Spec.Image + }, timeout, interval).Should(Equal(updatedImage)) + + By("deleting GpuDevicePlugin successfully") + Eventually(func() error { + f := &devicepluginv1.GpuDevicePlugin{} + k8sClient.Get(context.Background(), key, f) + return k8sClient.Delete(context.Background(), f) + }, timeout, interval).Should(Succeed()) + + Eventually(func() error { + f := &devicepluginv1.GpuDevicePlugin{} + return k8sClient.Get(context.Background(), key, f) + }, timeout, interval).ShouldNot(Succeed()) + }) + }) +}) diff --git a/test/envtest/qatdeviceplugin_controller_test.go b/test/envtest/qatdeviceplugin_controller_test.go new file mode 100644 index 00000000..317c0c79 --- /dev/null +++ b/test/envtest/qatdeviceplugin_controller_test.go @@ -0,0 +1,87 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package envtest + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + devicepluginv1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/deviceplugin/v1" +) + +var _ = Describe("QatDevicePlugin Controller", func() { + + const timeout = time.Second * 30 + const interval = time.Second * 1 + + Context("Basic CRUD operations", func() { + It("should handle QatDevicePlugin objects correctly", func() { + spec := devicepluginv1.QatDevicePluginSpec{ + Image: "testimage", + } + + key := types.NamespacedName{ + Name: "qatdeviceplugin-test", + Namespace: "default", + } + + toCreate := &devicepluginv1.QatDevicePlugin{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + Spec: spec, + } + + By("creating QatDevicePlugin successfully") + Expect(k8sClient.Create(context.Background(), toCreate)).Should(Succeed()) + time.Sleep(time.Second * 5) + + fetched := &devicepluginv1.QatDevicePlugin{} + Eventually(func() bool { + k8sClient.Get(context.Background(), key, fetched) + return len(fetched.Status.ControlledDaemonSet.UID) > 0 + }, timeout, interval).Should(BeTrue()) + + By("updating image name successfully") + updatedImage := "updated-testimage" + fetched.Spec.Image = updatedImage + + Expect(k8sClient.Update(context.Background(), fetched)).Should(Succeed()) + fetchedUpdated := &devicepluginv1.QatDevicePlugin{} + Eventually(func() string { + k8sClient.Get(context.Background(), key, fetchedUpdated) + return fetchedUpdated.Spec.Image + }, timeout, interval).Should(Equal(updatedImage)) + + By("deleting QatDevicePlugin successfully") + Eventually(func() error { + f := &devicepluginv1.QatDevicePlugin{} + k8sClient.Get(context.Background(), key, f) + return k8sClient.Delete(context.Background(), f) + }, timeout, interval).Should(Succeed()) + + Eventually(func() error { + f := &devicepluginv1.QatDevicePlugin{} + return k8sClient.Get(context.Background(), key, f) + }, timeout, interval).ShouldNot(Succeed()) + }) + }) +}) diff --git a/test/envtest/suite_test.go b/test/envtest/suite_test.go new file mode 100644 index 00000000..2e5213b2 --- /dev/null +++ b/test/envtest/suite_test.go @@ -0,0 +1,96 @@ +// Copyright 2020 Intel Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package envtest + +import ( + "path/filepath" + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/envtest/printer" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + devicepluginv1 "github.com/intel/intel-device-plugins-for-kubernetes/pkg/apis/deviceplugin/v1" + gpuctr "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers/gpu" + qatctr "github.com/intel/intel-device-plugins-for-kubernetes/pkg/controllers/qat" +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var k8sManager ctrl.Manager +var testEnv *envtest.Environment + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecsWithDefaultAndCustomReporters(t, + "Controller Suite", + []Reporter{printer.NewlineReporter{}}) +} + +var _ = BeforeSuite(func(done Done) { + logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "deployments", "operator", "crd", "bases")}, + } + + var err error + cfg, err = testEnv.Start() + Expect(err).ToNot(HaveOccurred()) + Expect(cfg).ToNot(BeNil()) + + err = devicepluginv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // +kubebuilder:scaffold:scheme + + k8sManager, err = ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + }) + Expect(err).ToNot(HaveOccurred()) + + err = gpuctr.SetupReconciler(k8sManager) + Expect(err).ToNot(HaveOccurred()) + err = qatctr.SetupReconciler(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + go func() { + err = k8sManager.Start(ctrl.SetupSignalHandler()) + Expect(err).ToNot(HaveOccurred()) + }() + + k8sClient = k8sManager.GetClient() + Expect(k8sClient).ToNot(BeNil()) + + close(done) +}, 60) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).ToNot(HaveOccurred()) +})