diff --git a/cluster-up/cluster/ephemeral-provider-common.sh b/cluster-up/cluster/ephemeral-provider-common.sh index 238a6c841..4a5bc8a29 100644 --- a/cluster-up/cluster/ephemeral-provider-common.sh +++ b/cluster-up/cluster/ephemeral-provider-common.sh @@ -6,32 +6,50 @@ KUBEVIRT_WITH_ETC_IN_MEMORY=${KUBEVIRT_WITH_ETC_IN_MEMORY:-false} KUBEVIRT_WITH_ETC_CAPACITY=${KUBEVIRT_WITH_ETC_CAPACITY:-none} if [ -z "${KUBEVIRTCI_TAG}" ] && [ -z "${KUBEVIRTCI_GOCLI_CONTAINER}" ]; then - echo "FATAL: either KUBEVIRTCI_TAG or KUBEVIRTCI_GOCLI_CONTAINER must be set" + >&2 echo "FATAL: either KUBEVIRTCI_TAG or KUBEVIRTCI_GOCLI_CONTAINER must be set" exit 1 fi if [ -n "${KUBEVIRTCI_TAG}" ] && [ -n "${KUBEVIRTCI_GOCLI_CONTAINER}" ]; then - echo "WARNING: KUBEVIRTCI_GOCLI_CONTAINER is set and will take precedence over the also set KUBEVIRTCI_TAG" + >&2 echo "WARNING: KUBEVIRTCI_GOCLI_CONTAINER is set and will take precedence over the also set KUBEVIRTCI_TAG" fi if [ "${KUBEVIRTCI_RUNTIME}" = "podman" ]; then - _cli="pack8s" + _cri_bin=podman + _docker_socket="${HOME}/podman.sock" +elif [ "${KUBEVIRTCI_RUNTIME}" = "docker" ]; then + _cri_bin=docker + _docker_socket="/var/run/docker.sock" else - _cli_container="${KUBEVIRTCI_GOCLI_CONTAINER:-quay.io/kubevirtci/gocli:${KUBEVIRTCI_TAG}}" - _cli="docker run --privileged --net=host --rm ${USE_TTY} -v /var/run/docker.sock:/var/run/docker.sock" - # gocli will try to mount /lib/modules to make it accessible to dnsmasq in - # in case it exists - if [ -d /lib/modules ]; then - _cli="${_cli} -v /lib/modules/:/lib/modules/" + if curl --unix-socket /${HOME}/podman.sock http://d/v3.0.0/libpod/info >/dev/null 2>&1; then + _cri_bin=podman + _docker_socket="${HOME}/podman.sock" + >&2 echo "selecting podman as container runtime" + elif docker ps >/dev/null; then + _cri_bin=docker + _docker_socket="/var/run/docker.sock" + >&2 echo "selecting docker as container runtime" + else + >&2 echo "no working container runtime found. Neither docker nor podman seems to work." + exit 1 fi - _cli="${_cli} ${_cli_container}" fi +_cli_container="${KUBEVIRTCI_GOCLI_CONTAINER:-quay.io/kubevirtci/gocli:${KUBEVIRTCI_TAG}}" +_cli="${_cri_bin} run --privileged --net=host --rm ${USE_TTY} -v ${_docker_socket}:/var/run/docker.sock" +# gocli will try to mount /lib/modules to make it accessible to dnsmasq in +# in case it exists +if [ -d /lib/modules ]; then + _cli="${_cli} -v /lib/modules/:/lib/modules/" +fi +_cli="${_cli} ${_cli_container}" + function _main_ip() { echo 127.0.0.1 } function _port() { + # shellcheck disable=SC2154 ${_cli} ports --prefix $provider_prefix "$@" } @@ -48,12 +66,16 @@ EOF } function _registry_volume() { + # shellcheck disable=SC2154 echo ${job_prefix}_registry } function _add_common_params() { + # shellcheck disable=SC2155 local params="--nodes ${KUBEVIRT_NUM_NODES} --memory ${KUBEVIRT_MEMORY_SIZE} --cpu 6 --secondary-nics ${KUBEVIRT_NUM_SECONDARY_NICS} --random-ports --background --prefix $provider_prefix --registry-volume $(_registry_volume) ${KUBEVIRT_PROVIDER} ${KUBEVIRT_PROVIDER_EXTRA_ARGS}" - if [[ $TARGET =~ windows.* ]] && [ -n "$WINDOWS_NFS_DIR" ]; then + if [[ $TARGET =~ windows_sysprep.* ]] && [ -n "$WINDOWS_SYSPREP_NFS_DIR" ]; then + params=" --nfs-data $WINDOWS_SYSPREP_NFS_DIR $params" + elif [[ $TARGET =~ windows.* ]] && [ -n "$WINDOWS_NFS_DIR" ]; then params=" --nfs-data $WINDOWS_NFS_DIR $params" elif [[ $TARGET =~ os-.* ]] && [ -n "$RHEL_NFS_DIR" ]; then params=" --nfs-data $RHEL_NFS_DIR $params" @@ -65,11 +87,11 @@ function _add_common_params() { if [ $KUBEVIRT_WITH_ETC_IN_MEMORY == "true" ]; then params=" --run-etcd-on-memory $params" if [ $KUBEVIRT_WITH_ETC_CAPACITY != "none" ]; then - params=" --etcd-capacity $KUBEVIRT_WITH_ETC_CAPACITY $params" + params=" --etcd-capacity $KUBEVIRT_WITH_ETC_CAPACITY $params" fi fi if [ $KUBEVIRT_DEPLOY_ISTIO == "true" ]; then - params=" --enable-istio $params" + params=" --enable-istio $params" fi # alternate (new) way to specify storage providers @@ -77,6 +99,29 @@ function _add_common_params() { params=" --enable-ceph $params" fi + if [[ $KUBEVIRT_DEPLOY_PROMETHEUS == "true" ]] && + [[ $KUBEVIRT_PROVIDER_EXTRA_ARGS != *"--enable-prometheus"* ]]; then + + if [[ ($KUBEVIRT_PROVIDER =~ k8s-1\.1.*) || ($KUBEVIRT_PROVIDER =~ k8s-1.20) ]]; then + echo "ERROR: cluster up failed because prometheus is only supported for providers >= k8s-1.21\n" + echo "the current provider is $KUBEVIRT_PROVIDER, consider updating to a newer version, or\n" + echo "disabling Prometheus using export KUBEVIRT_DEPLOY_PROMETHEUS=false" + exit 1 + fi + + params=" --enable-prometheus $params" + + if [[ $KUBEVIRT_DEPLOY_PROMETHEUS_ALERTMANAGER == "true" ]] && + [[ $KUBEVIRT_PROVIDER_EXTRA_ARGS != *"--enable-grafana"* ]]; then + params=" --enable-prometheus-alertmanager $params" + fi + + if [[ $KUBEVIRT_DEPLOY_GRAFANA == "true" ]] && + [[ $KUBEVIRT_PROVIDER_EXTRA_ARGS != *"--enable-grafana"* ]]; then + params=" --enable-grafana $params" + fi + fi + echo $params } diff --git a/cluster-up/cluster/k8s-1.16/README.md b/cluster-up/cluster/k8s-1.16/README.md deleted file mode 100644 index d32ecaa4a..000000000 --- a/cluster-up/cluster/k8s-1.16/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Kubernetes 1.16.2 in ephemeral containers - -Provides a pre-deployed Kubernetes with version 1.16.2 purely in docker -containers with qemu. The provided VMs are completely ephemeral and are -recreated on every cluster restart. The KubeVirt containers are built on the -local machine and are then pushed to a registry which is exposed at -`localhost:5000`. - -## Bringing the cluster up - -```bash -export KUBEVIRT_PROVIDER=k8s-1.16.2 -export KUBEVIRT_NUM_NODES=2 # master + one node -make cluster-up -``` - -The cluster can be accessed as usual: - -```bash -$ cluster/kubectl.sh get nodes -NAME STATUS ROLES AGE VERSION -node01 NotReady master 31s v1.16.2 -node02 NotReady 5s v1.16.2 -``` - -## Bringing the cluster down - -```bash -export KUBEVIRT_PROVIDER=k8s-1.16.2 -make cluster-down -``` - -This destroys the whole cluster. Recreating the cluster is fast, since k8s is -already pre-deployed. The only state which is kept is the state of the local -docker registry. - -## Destroying the docker registry state - -The docker registry survives a `make cluster-down`. It's state is stored in a -docker volume called `kubevirt_registry`. If the volume gets too big or the -volume contains corrupt data, it can be deleted with - -```bash -docker volume rm kubevirt_registry -``` diff --git a/cluster-up/cluster/k8s-1.16/provider.sh b/cluster-up/cluster/k8s-1.16/provider.sh deleted file mode 100644 index 3f4981e44..000000000 --- a/cluster-up/cluster/k8s-1.16/provider.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -set -e -source ${KUBEVIRTCI_PATH}/cluster/k8s-provider-common.sh \ No newline at end of file diff --git a/cluster-up/cluster/k8s-1.17/README.md b/cluster-up/cluster/k8s-1.17/README.md deleted file mode 100644 index 67df8c397..000000000 --- a/cluster-up/cluster/k8s-1.17/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# Kubernetes 1.17 in ephemeral containers - -Provides a pre-deployed Kubernetes with version 1.17 purely in docker -containers with qemu. The provided VMs are completely ephemeral and are -recreated on every cluster restart. The KubeVirt containers are built on the -local machine and are then pushed to a registry which is exposed at -`localhost:5000`. - -## Bringing the cluster up - -```bash -export KUBEVIRT_PROVIDER=k8s-1.17 -export KUBEVIRT_NUM_NODES=2 # master + one node -make cluster-up -``` - -The cluster can be accessed as usual: - -```bash -$ cluster/kubectl.sh get nodes -NAME STATUS ROLES AGE VERSION -node01 NotReady master 31s v1.17.1 -node02 NotReady 5s v1.17.1 -``` - -## Bringing the cluster up with cluster-network-addons-operator provisioned - -```bash -export KUBEVIRT_PROVIDER=k8s-1.17 -export KUBEVIRT_NUM_NODES=2 # master + one node -export KUBEVIRT_WITH_CNAO=true -make cluster-up -``` - -To get more info about CNAO you can check the github project documentation -here https://github.com/kubevirt/cluster-network-addons-operator - -## Bringing the cluster down - -```bash -export KUBEVIRT_PROVIDER=k8s-1.17 -make cluster-down -``` - -This destroys the whole cluster. Recreating the cluster is fast, since k8s is -already pre-deployed. The only state which is kept is the state of the local -docker registry. - -## Destroying the docker registry state - -The docker registry survives a `make cluster-down`. It's state is stored in a -docker volume called `kubevirt_registry`. If the volume gets too big or the -volume contains corrupt data, it can be deleted with - -```bash -docker volume rm kubevirt_registry -``` diff --git a/cluster-up/cluster/k8s-1.17/dev-guide.md b/cluster-up/cluster/k8s-1.17/dev-guide.md deleted file mode 100644 index 76923504b..000000000 --- a/cluster-up/cluster/k8s-1.17/dev-guide.md +++ /dev/null @@ -1,148 +0,0 @@ -# kubevirtci K8s provider dev guide. - -The purpose of kubevirtci is to create pre-provisioned K8s clusters as container images, -allowing people to easily run a K8s cluster. - -The target audience is developers of kubevirtci, who want to create a new provider, or to update an existing one. - -Please refer first to the following documents on how to run k8s-1.17.0:\ -[k8s-1.17.0 cluster-up](https://github.com/kubevirt/kubevirtci/blob/master/cluster-up/cluster/k8s-1.17.0/README.md) - -In this doc, we will go on what kubevirtci provider image consist of, what its inner architecture, -flow of start a pre-provisioned cluster, flow of creating a new provider, and how to create a new provider. - -A provider includes all the images (K8s base image, nodes OS image) and the scripts that allows it to start a -cluster offline, without downloading / installing / compiling new resources. -Deploying a cluster will create containers, which communicate with each other, in order to act as a K8s cluster. -It's a bit different from running bare-metal cluster where the nodes are physical machines or when the nodes are virtual machines on the host itself, -It gives us isolation advantage and state freezing of the needed components, allowing offline deploy, agnostic of the host OS, and installed packages. - -# Project structure -* cluster-provision folder - creating preprovisioned clusters. -* cluster-up folder - spinning up preprovisioned clusters. -* gocli - gocli is a binary that assist in provisioning and spinning up a cluster. sources of gocli are at cluster-provision/gocli. - -# K8s Deployment -Running `make cluster-up` will deploy a pre-provisioned cluster. -Upon finishing deployment of a K8s deploy, we will have 3 containers: -* k8s-1.17.0 vm container - a container that runs a qemu VM, which is the K8s node, in which the pods will run. -* Registry container - a shared image registry. -* k8s-1.17.0 dnsmasq container - a container that run dnsmasq, which gives dns and dhcp services. - -The containers are running and looks like this: -``` -[root@modi01 1.17.0]# docker ps -CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -3589e85efc7d kubevirtci/k8s-1.17.0 "/bin/bash -c '/vm.s…" About an hour ago Up About an hour k8s-1.17.0-node01 -4742dc02add2 registry:2.7.1 "/entrypoint.sh /etc…" About an hour ago Up About an hour k8s-1.17.0-registry -13787e7d4ac9 kubevirtci/k8s-1.17.0 "/bin/bash -c /dnsma…" About an hour ago Up About an hour 127.0.0.1:8443->8443/tcp, 0.0.0.0:32794->2201/tcp, 0.0.0.0:32793->5000/tcp, 0.0.0.0:32792->5901/tcp, 0.0.0.0:32791->6443/tcp k8s-1.17.0-dnsmasq -``` - -Nodes: -``` -[root@modi01 kubevirtci]# oc get nodes -NAME STATUS ROLES AGE VERSION -node01 Ready master 83m v1.17.0 -``` - -# Inner look of a deployed cluster -We can connect to the node of the cluster by: -``` -./cluster-up/ssh.sh node01 -``` - -List the pods -``` -[vagrant@node01 ~]$ sudo crictl pods -POD ID CREATED STATE NAME NAMESPACE ATTEMPT -403513878c8b7 10 minutes ago Ready coredns-6955765f44-m6ckl kube-system 4 -0c3e25e58b9d0 10 minutes ago Ready local-volume-provisioner-fkzgk default 4 -e6d96770770f4 10 minutes ago Ready coredns-6955765f44-mhfgg kube-system 4 -19ad529c78acc 10 minutes ago Ready kube-flannel-ds-amd64-mq5cx kube-system 0 -47acef4276900 10 minutes ago Ready kube-proxy-vtj59 kube-system 0 -df5863c55a52f 11 minutes ago Ready kube-scheduler-node01 kube-system 0 -ca0637d5ac82f 11 minutes ago Ready kube-apiserver-node01 kube-system 0 -f0d90506ce3b8 11 minutes ago Ready kube-controller-manager-node01 kube-system 0 -f873785341215 11 minutes ago Ready etcd-node01 kube-system 0 -``` - -Check kubelet service status -``` -[vagrant@node01 ~]$ systemctl status kubelet -● kubelet.service - kubelet: The Kubernetes Node Agent - Loaded: loaded (/usr/lib/systemd/system/kubelet.service; enabled; vendor preset: disabled) - Drop-In: /usr/lib/systemd/system/kubelet.service.d - └─10-kubeadm.conf - Active: active (running) since Wed 2020-01-15 13:39:54 UTC; 11min ago - Docs: https://kubernetes.io/docs/ - Main PID: 4294 (kubelet) - CGroup: /system.slice/kubelet.service - ‣ 4294 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/boo... -``` - -Connect to the container that runs the vm: -``` -CONTAINER=$(docker ps | grep vm | awk '{print $1}') -docker exec -it $CONTAINER bash -``` - -From within the container we can see there is a process of qemu which runs the node as a virtual machine. -``` -[root@855de8c8310f /]# ps -ef | grep qemu -root 1 0 36 13:39 ? 00:05:22 qemu-system-x86_64 -enable-kvm -drive format=qcow2,file=/var/run/disk/disk.qcow2,if=virtio,cache=unsafe -device virtio-net-pci,netdev=network0,mac=52:55:00:d1:55:01 -netdev tap,id=network0,ifname=tap01,script=no,downscript=no -device virtio-rng-pci -vnc :01 -cpu host -m 5120M -smp 5 -serial pty -``` - -# Flow of K8s provisioning (1.17 for example) -`cluster-provision/k8s/1.17.0/provision.sh` -* Runs the common cluster-provision/k8s/provision.sh. - * Runs cluster-provision/cli/cli (bash script). - * Creates a container for dnsmasq and runs dnsmasq.sh in it. - * Create a container, and runs vm.sh in it. - * Creates a vm using qemu, and checks its ready (according ssh). - * Runs cluster-provision/k8s/scripts/provision.sh in the container. - * Update docker trusted registries. - * Start kubelet service and K8s cluster. - * Enable ip routing. - * Apply additional manifests, such as flannel. - * Wait for pods to become ready. - * Pull needed images such as Ceph CSI, fluentd logger. - * Create local volume directories. - * Shutdown the vm and commit its container. - -# Flow of K8s cluster-up (1.17 for example) -Run -``` -export KUBEVIRT_PROVIDER=k8s-1.17.0 -make cluster-up -``` -* Runs cluster-up/up.sh which sources the following: - * cluster-up/cluster/k8s-1.17.0/provider.sh (selected according $KUBEVIRT_PROVIDER), which sources: - * cluster-up/cluster/k8s-provider-common.sh -* Runs `up` (which appears at cluster-up/cluster/k8s-provider-common.sh). -It Triggers `gocli run` - (cluster-provision/gocli/cmd/run.go) which create the following containers: - * Cluster container (that one with the vm from the provisioning, vm.sh is used with parameters here that starts an already created vm). - * Registry. - * Container for dnsmasq (provides dns, dhcp services). - -# Creating new K8s provider -Clone folders of k8s, folder name should be x/y as in the provider name x-y (ie. k8s-1.17.0) and includes: -* cluster-provision/k8s/1.17.0/provision.sh # used to create a new provider -* cluster-provision/k8s/1.17.0/publish.sh # used to publish new provider -* cluster-up/cluster/k8s-1.17.0/provider.sh # used by cluster-up -* cluster-up/cluster/k8s-1.17.0/README.md - -# Example - Adding a new manifest to K8s 1.17 -* First add the file at cluster-provision/manifests, this folder would be copied to /tmp in the container, -by cluster-provision/cli/cli as part of provision. -* Add this snippet at cluster-provision/k8s/scripts/provision.sh, before "Wait at least for 7 pods" line. -``` -custom_manifest="/tmp/custom_manifest.yaml" -kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f "$custom_manifest" -``` -* Run ./cluster-provision/k8s/1.17.0/provision.sh, it will create a new provision and test it. -* Run ./cluster-provision/k8s/1.17.0/publish.sh, it will publish the new created image to docker.io -* Update k8s-1.17.0 image line at cluster-up/cluster/images.sh, to point on the newly published image. -* Create a PR with the following files: - * The new manifest. - * Updated cluster-provision/k8s/scripts/provision.sh - * Updated cluster-up/cluster/images.sh. diff --git a/cluster-up/cluster/k8s-1.17/provider.sh b/cluster-up/cluster/k8s-1.17/provider.sh deleted file mode 100644 index 3f4981e44..000000000 --- a/cluster-up/cluster/k8s-1.17/provider.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -set -e -source ${KUBEVIRTCI_PATH}/cluster/k8s-provider-common.sh \ No newline at end of file diff --git a/cluster-up/cluster/k8s-1.18/provider.sh b/cluster-up/cluster/k8s-1.18/provider.sh deleted file mode 100644 index 3f4981e44..000000000 --- a/cluster-up/cluster/k8s-1.18/provider.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -set -e -source ${KUBEVIRTCI_PATH}/cluster/k8s-provider-common.sh \ No newline at end of file diff --git a/cluster-up/cluster/k8s-1.20/README.md b/cluster-up/cluster/k8s-1.20/README.md index 3f85e5d2f..f7a1fea2c 100644 --- a/cluster-up/cluster/k8s-1.20/README.md +++ b/cluster-up/cluster/k8s-1.20/README.md @@ -35,6 +35,15 @@ make cluster-up To get more info about CNAO you can check the github project documentation here https://github.com/kubevirt/cluster-network-addons-operator +## Bringing the cluster up with cgroup v2 + +```bash +export KUBEVIRT_PROVIDER=k8s-1.20 +export KUBEVIRT_NUM_NODES=2 # master + one node +export KUBEVIRT_CGROUPV2=true +make cluster-up +``` + ## Bringing the cluster down ```bash diff --git a/cluster-up/cluster/k8s-1.20/provider.sh b/cluster-up/cluster/k8s-1.20/provider.sh index 55bb4ae9d..e2bf40cda 100644 --- a/cluster-up/cluster/k8s-1.20/provider.sh +++ b/cluster-up/cluster/k8s-1.20/provider.sh @@ -1,4 +1,9 @@ #!/usr/bin/env bash set -e + +if [ "${KUBEVIRT_CGROUPV2}" == "true" ]; then + export KUBEVIRT_PROVIDER_EXTRA_ARGS="${KUBEVIRT_PROVIDER_EXTRA_ARGS} --kernel-args='systemd.unified_cgroup_hierarchy=1'" +fi + # shellcheck disable=SC1090 source "${KUBEVIRTCI_PATH}/cluster/k8s-provider-common.sh" diff --git a/cluster-up/cluster/k8s-1.18/README.md b/cluster-up/cluster/k8s-1.21/README.md similarity index 79% rename from cluster-up/cluster/k8s-1.18/README.md rename to cluster-up/cluster/k8s-1.21/README.md index d2f75370f..c8efdf593 100644 --- a/cluster-up/cluster/k8s-1.18/README.md +++ b/cluster-up/cluster/k8s-1.21/README.md @@ -1,6 +1,6 @@ -# Kubernetes 1.18 in ephemeral containers +# Kubernetes 1.21 in ephemeral containers -Provides a pre-deployed Kubernetes with version 1.18 purely in docker +Provides a pre-deployed Kubernetes with version 1.21 purely in docker containers with qemu. The provided VMs are completely ephemeral and are recreated on every cluster restart. The KubeVirt containers are built on the local machine and are then pushed to a registry which is exposed at @@ -9,7 +9,7 @@ local machine and are then pushed to a registry which is exposed at ## Bringing the cluster up ```bash -export KUBEVIRT_PROVIDER=k8s-1.18 +export KUBEVIRT_PROVIDER=k8s-1.21 export KUBEVIRT_NUM_NODES=2 # master + one node make cluster-up ``` @@ -19,14 +19,14 @@ The cluster can be accessed as usual: ```bash $ cluster/kubectl.sh get nodes NAME STATUS ROLES AGE VERSION -node01 NotReady master 31s v1.18.1 -node02 NotReady 5s v1.18.1 +node01 NotReady master 31s v1.21.1 +node02 NotReady 5s v1.21.1 ``` ## Bringing the cluster up with cluster-network-addons-operator provisioned ```bash -export KUBEVIRT_PROVIDER=k8s-1.18 +export KUBEVIRT_PROVIDER=k8s-1.21 export KUBEVIRT_NUM_NODES=2 # master + one node export KUBEVIRT_WITH_CNAO=true make cluster-up @@ -35,10 +35,19 @@ make cluster-up To get more info about CNAO you can check the github project documentation here https://github.com/kubevirt/cluster-network-addons-operator +## Bringing the cluster up with cgroup v2 + +```bash +export KUBEVIRT_PROVIDER=k8s-1.20 +export KUBEVIRT_NUM_NODES=2 # master + one node +export KUBEVIRT_CGROUPV2=true +make cluster-up +``` + ## Bringing the cluster down ```bash -export KUBEVIRT_PROVIDER=k8s-1.18 +export KUBEVIRT_PROVIDER=k8s-1.21 make cluster-down ``` @@ -67,7 +76,7 @@ has to be enabled on your Docker. Add following to your "ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64" } -``` +``` ```bash systemctl restart docker diff --git a/cluster-up/cluster/k8s-1.18/dev-guide.md b/cluster-up/cluster/k8s-1.21/dev-guide.md similarity index 83% rename from cluster-up/cluster/k8s-1.18/dev-guide.md rename to cluster-up/cluster/k8s-1.21/dev-guide.md index b412d41e0..3be86214f 100644 --- a/cluster-up/cluster/k8s-1.18/dev-guide.md +++ b/cluster-up/cluster/k8s-1.21/dev-guide.md @@ -5,8 +5,8 @@ allowing people to easily run a K8s cluster. The target audience is developers of kubevirtci, who want to create a new provider, or to update an existing one. -Please refer first to the following documents on how to run k8s-1.18:\ -[k8s-1.18 cluster-up](https://github.com/kubevirt/kubevirtci/blob/master/cluster-up/cluster/k8s-1.18/README.md) +Please refer first to the following documents on how to run k8s-1.21:\ +[k8s-1.21 cluster-up](https://github.com/kubevirt/kubevirtci/blob/master/cluster-up/cluster/k8s-1.21/README.md) In this doc, we will go on what kubevirtci provider image consist of, what its inner architecture, flow of start a pre-provisioned cluster, flow of creating a new provider, and how to create a new provider. @@ -15,7 +15,7 @@ A provider includes all the images (K8s base image, nodes OS image) and the scri cluster offline, without downloading / installing / compiling new resources. Deploying a cluster will create containers, which communicate with each other, in order to act as a K8s cluster. It's a bit different from running bare-metal cluster where the nodes are physical machines or when the nodes are virtual machines on the host itself, -It gives us isolation advantage and state freezing of the needed components, allowing offline deploy, agnostic of the host OS, and installed packages. +It gives us isolation advantage and state freezing of the needed components, allowing offline deploy, agnostic of the host OS, and installed packages. # Project structure * cluster-provision folder - creating preprovisioned clusters. @@ -25,24 +25,24 @@ It gives us isolation advantage and state freezing of the needed components, all # K8s Deployment Running `make cluster-up` will deploy a pre-provisioned cluster. Upon finishing deployment of a K8s deploy, we will have 3 containers: -* k8s-1.18 vm container - a container that runs a qemu VM, which is the K8s node, in which the pods will run. +* k8s-1.21 vm container - a container that runs a qemu VM, which is the K8s node, in which the pods will run. * Registry container - a shared image registry. -* k8s-1.18 dnsmasq container - a container that run dnsmasq, which gives dns and dhcp services. +* k8s-1.21 dnsmasq container - a container that run dnsmasq, which gives dns and dhcp services. The containers are running and looks like this: ``` -[root@modi01 1.18.0]# docker ps +[root@modi01 1.21.0]# docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -3589e85efc7d kubevirtci/k8s-1.18.0 "/bin/bash -c '/vm.s…" About an hour ago Up About an hour k8s-1.18.0-node01 -4742dc02add2 registry:2.7.1 "/entrypoint.sh /etc…" About an hour ago Up About an hour k8s-1.18.0-registry -13787e7d4ac9 kubevirtci/k8s-1.18.0 "/bin/bash -c /dnsma…" About an hour ago Up About an hour 127.0.0.1:8443->8443/tcp, 0.0.0.0:32794->2201/tcp, 0.0.0.0:32793->5000/tcp, 0.0.0.0:32792->5901/tcp, 0.0.0.0:32791->6443/tcp k8s-1.18.0-dnsmasq +3589e85efc7d kubevirtci/k8s-1.21.0 "/bin/bash -c '/vm.s…" About an hour ago Up About an hour k8s-1.21.0-node01 +4742dc02add2 registry:2.7.1 "/entrypoint.sh /etc…" About an hour ago Up About an hour k8s-1.21.0-registry +13787e7d4ac9 kubevirtci/k8s-1.21.0 "/bin/bash -c /dnsma…" About an hour ago Up About an hour 127.0.0.1:8443->8443/tcp, 0.0.0.0:32794->2201/tcp, 0.0.0.0:32793->5000/tcp, 0.0.0.0:32792->5901/tcp, 0.0.0.0:32791->6443/tcp k8s-1.21.0-dnsmasq ``` Nodes: ``` [root@modi01 kubevirtci]# oc get nodes NAME STATUS ROLES AGE VERSION -node01 Ready master 83m v1.18.0 +node01 Ready master 83m v1.21.0 ``` # Inner look of a deployed cluster @@ -92,8 +92,8 @@ From within the container we can see there is a process of qemu which runs the n root 1 0 36 13:39 ? 00:05:22 qemu-system-x86_64 -enable-kvm -drive format=qcow2,file=/var/run/disk/disk.qcow2,if=virtio,cache=unsafe -device virtio-net-pci,netdev=network0,mac=52:55:00:d1:55:01 -netdev tap,id=network0,ifname=tap01,script=no,downscript=no -device virtio-rng-pci -vnc :01 -cpu host -m 5120M -smp 5 -serial pty ``` -# Flow of K8s provisioning (1.18 for example) -`cluster-provision/k8s/1.18.0/provision.sh` +# Flow of K8s provisioning (1.21 for example) +`cluster-provision/k8s/1.21.0/provision.sh` * Runs the common cluster-provision/k8s/provision.sh. * Runs cluster-provision/cli/cli (bash script). * Creates a container for dnsmasq and runs dnsmasq.sh in it. @@ -109,14 +109,14 @@ root 1 0 36 13:39 ? 00:05:22 qemu-system-x86_64 -enable-kvm - * Create local volume directories. * Shutdown the vm and commit its container. -# Flow of K8s cluster-up (1.18 for example) +# Flow of K8s cluster-up (1.21 for example) Run ``` -export KUBEVIRT_PROVIDER=k8s-1.18.0 +export KUBEVIRT_PROVIDER=k8s-1.21.0 make cluster-up ``` * Runs cluster-up/up.sh which sources the following: - * cluster-up/cluster/k8s-1.18.0/provider.sh (selected according $KUBEVIRT_PROVIDER), which sources: + * cluster-up/cluster/k8s-1.21.0/provider.sh (selected according $KUBEVIRT_PROVIDER), which sources: * cluster-up/cluster/k8s-provider-common.sh * Runs `up` (which appears at cluster-up/cluster/k8s-provider-common.sh). It Triggers `gocli run` - (cluster-provision/gocli/cmd/run.go) which create the following containers: @@ -125,23 +125,23 @@ It Triggers `gocli run` - (cluster-provision/gocli/cmd/run.go) which create the * Container for dnsmasq (provides dns, dhcp services). # Creating new K8s provider -Clone folders of k8s, folder name should be x/y as in the provider name x-y (ie. k8s-1.18.0) and includes: -* cluster-provision/k8s/1.18.0/provision.sh # used to create a new provider -* cluster-provision/k8s/1.18.0/publish.sh # used to publish new provider -* cluster-up/cluster/k8s-1.18.0/provider.sh # used by cluster-up -* cluster-up/cluster/k8s-1.18.0/README.md +Clone folders of k8s, folder name should be x/y as in the provider name x-y (ie. k8s-1.21.0) and includes: +* cluster-provision/k8s/1.21.0/provision.sh # used to create a new provider +* cluster-provision/k8s/1.21.0/publish.sh # used to publish new provider +* cluster-up/cluster/k8s-1.21.0/provider.sh # used by cluster-up +* cluster-up/cluster/k8s-1.21.0/README.md -# Example - Adding a new manifest to K8s 1.18 +# Example - Adding a new manifest to K8s 1.21 * First add the file at cluster-provision/manifests, this folder would be copied to /tmp in the container, by cluster-provision/cli/cli as part of provision. * Add this snippet at cluster-provision/k8s/scripts/provision.sh, before "Wait at least for 7 pods" line. ``` custom_manifest="/tmp/custom_manifest.yaml" -kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f "$custom_manifest" +kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f "$custom_manifest" ``` -* Run ./cluster-provision/k8s/1.18.0/provision.sh, it will create a new provision and test it. -* Run ./cluster-provision/k8s/1.18.0/publish.sh, it will publish the new created image to docker.io -* Update k8s-1.18.0 image line at cluster-up/cluster/images.sh, to point on the newly published image. +* Run ./cluster-provision/k8s/1.21.0/provision.sh, it will create a new provision and test it. +* Run ./cluster-provision/k8s/1.21.0/publish.sh, it will publish the new created image to docker.io +* Update k8s-1.21.0 image line at cluster-up/cluster/images.sh, to point on the newly published image. * Create a PR with the following files: * The new manifest. * Updated cluster-provision/k8s/scripts/provision.sh diff --git a/cluster-up/cluster/k8s-1.21/provider.sh b/cluster-up/cluster/k8s-1.21/provider.sh new file mode 100644 index 000000000..e2bf40cda --- /dev/null +++ b/cluster-up/cluster/k8s-1.21/provider.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -e + +if [ "${KUBEVIRT_CGROUPV2}" == "true" ]; then + export KUBEVIRT_PROVIDER_EXTRA_ARGS="${KUBEVIRT_PROVIDER_EXTRA_ARGS} --kernel-args='systemd.unified_cgroup_hierarchy=1'" +fi + +# shellcheck disable=SC1090 +source "${KUBEVIRTCI_PATH}/cluster/k8s-provider-common.sh" diff --git a/cluster-up/cluster/k8s-provider-common.sh b/cluster-up/cluster/k8s-provider-common.sh index 7e8bedd94..ee5029357 100644 --- a/cluster-up/cluster/k8s-provider-common.sh +++ b/cluster-up/cluster/k8s-provider-common.sh @@ -5,7 +5,12 @@ set -e source ${KUBEVIRTCI_PATH}/cluster/ephemeral-provider-common.sh function up() { - ${_cli} run $(_add_common_params) + params=$(echo $(_add_common_params)) + if [[ ! -z $(echo $params | grep ERROR) ]]; then + echo -e $params + exit 1 + fi + eval ${_cli} run $params # Copy k8s config and kubectl ${_cli} scp --prefix $provider_prefix /usr/bin/kubectl - >${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubectl @@ -46,4 +51,33 @@ function up() { $kubectl wait networkaddonsconfig cluster --for condition=Available --timeout=200s fi fi + + if [ "$KUBEVIRT_DEPLOY_ISTIO" == "true" ] && [[ $KUBEVIRT_PROVIDER =~ k8s-1\.1.* ]]; then + echo "ERROR: Istio is not supported on kubevirtci version < 1.20" + exit 1 + + elif [ "$KUBEVIRT_DEPLOY_ISTIO" == "true" ]; then + if [ "$KUBEVIRT_WITH_CNAO" == "true" ]; then + $kubectl create -f /opt/istio/istio-operator-with-cnao.cr.yaml + else + $kubectl create -f /opt/istio/istio-operator.cr.yaml + fi + + istio_operator_ns=istio-system + retries=0 + max_retries=20 + while [[ $retries -lt $max_retries ]]; do + echo "waiting for istio-operator to be healthy" + sleep 5 + health=$(kubectl -n $istio_operator_ns get istiooperator istio-operator -o jsonpath="{.status.status}") + if [[ $health == "HEALTHY" ]]; then + break + fi + retries=$((retries + 1)) + done + if [ $retries == $max_retries ]; then + echo "waiting istio-operator to be healthy failed" + exit 1 + fi + fi } diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/OWNERS b/cluster-up/cluster/kind-1.19-sriov/OWNERS similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/OWNERS rename to cluster-up/cluster/kind-1.19-sriov/OWNERS diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/README.md b/cluster-up/cluster/kind-1.19-sriov/README.md similarity index 96% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/README.md rename to cluster-up/cluster/kind-1.19-sriov/README.md index 6f53fde2b..cb8de791a 100644 --- a/cluster-up/cluster/kind-k8s-sriov-1.17.0/README.md +++ b/cluster-up/cluster/kind-1.19-sriov/README.md @@ -56,6 +56,7 @@ In order to achieve that, there are two options: The user can list the PFs that should not be allocated to the current cluster, keeping in mind that at least one (or 2 in case of migration), should not be listed, so they would be allocated for the current cluster. Note: another reason to blacklist a PF, is in case its has a defect or should be kept for other operations (for example sniffing). +- Clusters should be created one by another and not in parallel (to avoid races over SRIOV PF's). - The cluster names must be different. This can be achieved by setting `export CLUSTER_NAME=sriov2` on the 2nd cluster. The default `CLUSTER_NAME` is `sriov`. @@ -70,6 +71,4 @@ Kubevirtci is agnostic and nothing needs to be done, since all conditions above - Upper limit of the number of clusters that can be run on the same time equals number of PFs / number of PFs per cluster, therefore, in case there is only one PF, only one cluster can be created. Locally the actual limit currently supported is two clusters. -- Kubevirtci supports starting `cluster-up` simultaneously, since it is capable of handling race conditions, -when allocating PFs. - In order to use `make cluster-down` please make sure the right `CLUSTER_NAME` is exported. diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/TROUBLESHOOTING.md b/cluster-up/cluster/kind-1.19-sriov/TROUBLESHOOTING.md similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/TROUBLESHOOTING.md rename to cluster-up/cluster/kind-1.19-sriov/TROUBLESHOOTING.md diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/certlib/selfsign.go b/cluster-up/cluster/kind-1.19-sriov/certcreator/certlib/selfsign.go similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/certlib/selfsign.go rename to cluster-up/cluster/kind-1.19-sriov/certcreator/certlib/selfsign.go diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/certsecret.go b/cluster-up/cluster/kind-1.19-sriov/certcreator/certsecret.go similarity index 96% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/certsecret.go rename to cluster-up/cluster/kind-1.19-sriov/certcreator/certsecret.go index 5bd453481..205985a64 100644 --- a/cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/certsecret.go +++ b/cluster-up/cluster/kind-1.19-sriov/certcreator/certsecret.go @@ -57,7 +57,7 @@ func generate(hookName, namespace string) ([]byte, []byte, error) { if err != nil { return nil, nil, fmt.Errorf("failed to generate self-signed certificate: %v", err) } - log.Printf("Self-Signed certificate created sucessfully for CN %s", certConfig.CommonName) + log.Printf("Self-Signed certificate created successfully for CN %s", certConfig.CommonName) return certConfig.Certificate.Bytes(), certConfig.PrivateKey.Bytes(), nil } @@ -114,7 +114,7 @@ func createSecret(clusterApi kubernetes.Interface, namespace, secretName string, if err != nil { return fmt.Errorf("timeout waiting for secret '%s' to create secret: %v", secret.Name, err) } - log.Printf("Secret '%s' at '%s' created sucessfully", secret.Name, namespace) + log.Printf("Secret '%s' at '%s' created successfully", secret.Name, namespace) return nil } diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/go.mod b/cluster-up/cluster/kind-1.19-sriov/certcreator/go.mod similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/go.mod rename to cluster-up/cluster/kind-1.19-sriov/certcreator/go.mod diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/go.sum b/cluster-up/cluster/kind-1.19-sriov/certcreator/go.sum similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/certcreator/go.sum rename to cluster-up/cluster/kind-1.19-sriov/certcreator/go.sum diff --git a/cluster-up/cluster/kind-1.19-sriov/config_sriov_cluster.sh b/cluster-up/cluster/kind-1.19-sriov/config_sriov_cluster.sh new file mode 100755 index 000000000..effef5903 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/config_sriov_cluster.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +[ $(id -u) -ne 0 ] && echo "FATAL: this script requires sudo privileges" >&2 && exit 1 + +set -xe + +PF_COUNT_PER_NODE=${PF_COUNT_PER_NODE:-1} +[ $PF_COUNT_PER_NODE -le 0 ] && echo "FATAL: PF_COUNT_PER_NODE must be a positive integer" >&2 && exit 1 + +SCRIPT_PATH=$(dirname "$(realpath "$0")") + +source ${SCRIPT_PATH}/sriov-node/node.sh +source ${SCRIPT_PATH}/sriov-components/sriov_components.sh + +CONFIGURE_VFS_SCRIPT_PATH="$SCRIPT_PATH/sriov-node/configure_vfs.sh" + +SRIOV_COMPONENTS_NAMESPACE="sriov" +SRIOV_NODE_LABEL_KEY="sriov_capable" +SRIOV_NODE_LABEL_VALUE="true" +SRIOV_NODE_LABEL="$SRIOV_NODE_LABEL_KEY=$SRIOV_NODE_LABEL_VALUE" +SRIOVDP_RESOURCE_PREFIX="kubevirt.io" +SRIOVDP_RESOURCE_NAME="sriov_net" +VFS_DRIVER="vfio-pci" +VFS_DRIVER_KMODULE="vfio_pci" + +function validate_nodes_sriov_allocatable_resource() { + local -r resource_name="$SRIOVDP_RESOURCE_PREFIX/$SRIOVDP_RESOURCE_NAME" + local -r sriov_nodes=$(_kubectl get nodes -l $SRIOV_NODE_LABEL -o custom-columns=:.metadata.name --no-headers) + + local num_vfs + for sriov_node in $sriov_nodes; do + num_vfs=$(node::total_vfs_count "$sriov_node") + sriov_components::wait_allocatable_resource "$sriov_node" "$resource_name" "$num_vfs" + done +} + +worker_nodes=($(_kubectl get nodes -l node-role.kubernetes.io/worker -o custom-columns=:.metadata.name --no-headers)) +worker_nodes_count=${#worker_nodes[@]} +[ "$worker_nodes_count" -eq 0 ] && echo "FATAL: no worker nodes found" >&2 && exit 1 + +pfs_names=($(node::discover_host_pfs)) +pf_count="${#pfs_names[@]}" +[ "$pf_count" -eq 0 ] && echo "FATAL: Could not find available sriov PF's" >&2 && exit 1 + +total_pf_required=$((worker_nodes_count*PF_COUNT_PER_NODE)) +[ "$pf_count" -lt "$total_pf_required" ] && \ + echo "FATAL: there are not enough PF's on the host, try to reduce PF_COUNT_PER_NODE + Worker nodes count: $worker_nodes_count + PF per node count: $PF_COUNT_PER_NODE + Total PF count required: $total_pf_required" >&2 && exit 1 + +## Move SRIOV Physical Functions to worker nodes create VF's and configure their drivers +PFS_IN_USE="" +node::configure_sriov_pfs_and_vfs "${worker_nodes[*]}" "${pfs_names[*]}" "$PF_COUNT_PER_NODE" "PFS_IN_USE" + +## Deploy Multus and SRIOV components +sriov_components::deploy_multus +sriov_components::deploy \ + "$PFS_IN_USE" \ + "$VFS_DRIVER" \ + "$SRIOVDP_RESOURCE_PREFIX" "$SRIOVDP_RESOURCE_NAME" \ + "$SRIOV_NODE_LABEL_KEY" "$SRIOV_NODE_LABEL_VALUE" + +# Verify that each sriov capable node has sriov VFs allocatable resource +validate_nodes_sriov_allocatable_resource +sriov_components::wait_pods_ready + +_kubectl get nodes +_kubectl get pods -n $SRIOV_COMPONENTS_NAMESPACE diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/manifests/network_config_policy.yaml b/cluster-up/cluster/kind-1.19-sriov/manifests/network_config_policy.yaml similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/manifests/network_config_policy.yaml rename to cluster-up/cluster/kind-1.19-sriov/manifests/network_config_policy.yaml diff --git a/cluster-up/cluster/kind-1.19-sriov/provider.sh b/cluster-up/cluster/kind-1.19-sriov/provider.sh new file mode 100755 index 000000000..1b834f784 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/provider.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +set -e + +DEFAULT_CLUSTER_NAME="sriov" +DEFAULT_HOST_PORT=5000 +ALTERNATE_HOST_PORT=5001 +export CLUSTER_NAME=${CLUSTER_NAME:-$DEFAULT_CLUSTER_NAME} + +if [ $CLUSTER_NAME == $DEFAULT_CLUSTER_NAME ]; then + export HOST_PORT=$DEFAULT_HOST_PORT +else + export HOST_PORT=$ALTERNATE_HOST_PORT +fi + +#'kubevirt-test-default1' is the default namespace of +# Kubevirt SRIOV tests where the SRIOV VM's will be created. +SRIOV_TESTS_NS="${SRIOV_TESTS_NS:-kubevirt-test-default1}" + +function set_kind_params() { + export KIND_VERSION="${KIND_VERSION:-0.11.1}" + export KIND_NODE_IMAGE="${KIND_NODE_IMAGE:-quay.io/kubevirtci/kindest_node:v1.19.11@sha256:cbecc517bfad65e368cd7975d1e8a4f558d91160c051d0b1d10ff81488f5fb06}" + export KUBECTL_PATH="${KUBECTL_PATH:-/bin/kubectl}" +} + +function print_sriov_data() { + nodes=$(_kubectl get nodes -o=custom-columns=:.metadata.name | awk NF) + for node in $nodes; do + if [[ ! "$node" =~ .*"control-plane".* ]]; then + echo "Node: $node" + echo "VFs:" + docker exec $node bash -c "ls -l /sys/class/net/*/device/virtfn*" + echo "PFs PCI Addresses:" + docker exec $node bash -c "grep PCI_SLOT_NAME /sys/class/net/*/device/uevent" + fi + done +} + +function up() { + # print hardware info for easier debugging based on logs + echo 'Available NICs' + docker run --rm --cap-add=SYS_RAWIO quay.io/phoracek/lspci@sha256:0f3cacf7098202ef284308c64e3fc0ba441871a846022bb87d65ff130c79adb1 sh -c "lspci | egrep -i 'network|ethernet'" + echo "" + + cp $KIND_MANIFESTS_DIR/kind.yaml ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml + kind_up + + # remove the rancher.io kind default storageClass + _kubectl delete sc standard + + ${KUBEVIRTCI_PATH}/cluster/$KUBEVIRT_PROVIDER/config_sriov_cluster.sh + + # In order to support live migration on containerized cluster we need to workaround + # Libvirt uuid check for source and target nodes. + # To do that we create PodPreset that mounts fake random product_uuid to virt-launcher pods, + # and kubevirt SRIOV tests namespace for the PodPrest beforhand. + podpreset::expose_unique_product_uuid_per_node "$CLUSTER_NAME" "$SRIOV_TESTS_NS" + + print_sriov_data + echo "$KUBEVIRT_PROVIDER cluster '$CLUSTER_NAME' is ready" +} + +set_kind_params + +source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh +source ${KUBEVIRTCI_PATH}/cluster/kind/podpreset.sh diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/kustomization.yaml b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/kustomization.yaml new file mode 100644 index 000000000..46c939e2f --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/kustomization.yaml @@ -0,0 +1,34 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: sriov +resources: +- sriov-ns.yaml +- sriov-cni-daemonset.yaml +- sriovdp-daemonset.yaml +- sriovdp-config.yaml +images: + - name: nfvpe/sriov-device-plugin + newName: quay.io/kubevirtci/sriov-device-plugin + newTag: v3.3 + - name: nfvpe/sriov-cni + newName: quay.io/kubevirtci/sriov-cni + newTag: v2.6 +patchesJson6902: +- target: + group: apps + version: v1 + kind: DaemonSet + name: kube-sriov-cni-ds-amd64 + path: patch-node-selector.yaml +- target: + group: apps + version: v1 + kind: DaemonSet + name: kube-sriov-device-plugin-amd64 + path: patch-node-selector.yaml +- target: + group: apps + version: v1 + kind: DaemonSet + name: kube-sriov-device-plugin-amd64 + path: patch-sriovdp-resource-prefix.yaml diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/manifests/multus.yaml b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/multus.yaml similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/manifests/multus.yaml rename to cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/multus.yaml diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/patch-node-selector.yaml.in b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/patch-node-selector.yaml.in new file mode 100644 index 000000000..0117c8cdd --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/patch-node-selector.yaml.in @@ -0,0 +1,3 @@ +- op: add + path: /spec/template/spec/nodeSelector/$LABEL_KEY + value: "$LABEL_VALUE" diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/patch-sriovdp-resource-prefix.yaml.in b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/patch-sriovdp-resource-prefix.yaml.in new file mode 100644 index 000000000..563e606a9 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/patch-sriovdp-resource-prefix.yaml.in @@ -0,0 +1,3 @@ +- op: add + path: /spec/template/spec/containers/0/args/-1 + value: --resource-prefix=$RESOURCE_PREFIX diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriov-cni-daemonset.yaml b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriov-cni-daemonset.yaml new file mode 100644 index 000000000..6a28c146f --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriov-cni-daemonset.yaml @@ -0,0 +1,47 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-sriov-cni-ds-amd64 + namespace: kube-system + labels: + tier: node + app: sriov-cni +spec: + selector: + matchLabels: + name: sriov-cni + template: + metadata: + labels: + name: sriov-cni + tier: node + app: sriov-cni + spec: + hostNetwork: true + nodeSelector: + beta.kubernetes.io/arch: amd64 + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: kube-sriov-cni + image: nfvpe/sriov-cni + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "100m" + memory: "50Mi" + volumeMounts: + - name: cnibin + mountPath: /host/opt/cni/bin + volumes: + - name: cnibin + hostPath: + path: /opt/cni/bin diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriov-ns.yaml b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriov-ns.yaml new file mode 100644 index 000000000..bfe55b30d --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriov-ns.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: sriov diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriovdp-config.yaml.in b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriovdp-config.yaml.in new file mode 100644 index 000000000..5e9788168 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriovdp-config.yaml.in @@ -0,0 +1,17 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sriovdp-config + namespace: kube-system +data: + config.json: | + { + "resourceList": [{ + "resourceName": "$RESOURCE_NAME", + "selectors": { + "drivers": $DRIVERS, + "pfNames": $PF_NAMES + } + }] + } diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriovdp-daemonset.yaml b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriovdp-daemonset.yaml new file mode 100644 index 000000000..86d17cf6d --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/manifests/sriovdp-daemonset.yaml @@ -0,0 +1,202 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sriov-device-plugin + namespace: kube-system + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-sriov-device-plugin-amd64 + namespace: kube-system + labels: + tier: node + app: sriovdp +spec: + selector: + matchLabels: + name: sriov-device-plugin + template: + metadata: + labels: + name: sriov-device-plugin + tier: node + app: sriovdp + spec: + hostNetwork: true + nodeSelector: + beta.kubernetes.io/arch: amd64 + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + serviceAccountName: sriov-device-plugin + containers: + - name: kube-sriovdp + image: nfvpe/sriov-device-plugin:v3.3 + imagePullPolicy: IfNotPresent + args: + - --log-dir=sriovdp + - --log-level=10 + securityContext: + privileged: true + volumeMounts: + - name: devicesock + mountPath: /var/lib/kubelet/ + readOnly: false + - name: log + mountPath: /var/log + - name: config-volume + mountPath: /etc/pcidp + - name: device-info + mountPath: /var/run/k8s.cni.cncf.io/devinfo/dp + volumes: + - name: devicesock + hostPath: + path: /var/lib/kubelet/ + - name: log + hostPath: + path: /var/log + - name: device-info + hostPath: + path: /var/run/k8s.cni.cncf.io/devinfo/dp + type: DirectoryOrCreate + - name: config-volume + configMap: + name: sriovdp-config + items: + - key: config.json + path: config.json + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-sriov-device-plugin-ppc64le + namespace: kube-system + labels: + tier: node + app: sriovdp +spec: + selector: + matchLabels: + name: sriov-device-plugin + template: + metadata: + labels: + name: sriov-device-plugin + tier: node + app: sriovdp + spec: + hostNetwork: true + nodeSelector: + beta.kubernetes.io/arch: ppc64le + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + serviceAccountName: sriov-device-plugin + containers: + - name: kube-sriovdp + image: nfvpe/sriov-device-plugin:ppc64le + imagePullPolicy: IfNotPresent + args: + - --log-dir=sriovdp + - --log-level=10 + securityContext: + privileged: true + volumeMounts: + - name: devicesock + mountPath: /var/lib/kubelet/ + readOnly: false + - name: log + mountPath: /var/log + - name: config-volume + mountPath: /etc/pcidp + - name: device-info + mountPath: /var/run/k8s.cni.cncf.io/devinfo/dp + volumes: + - name: devicesock + hostPath: + path: /var/lib/kubelet/ + - name: log + hostPath: + path: /var/log + - name: device-info + hostPath: + path: /var/run/k8s.cni.cncf.io/devinfo/dp + type: DirectoryOrCreate + - name: config-volume + configMap: + name: sriovdp-config + items: + - key: config.json + path: config.json +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-sriov-device-plugin-arm64 + namespace: kube-system + labels: + tier: node + app: sriovdp +spec: + selector: + matchLabels: + name: sriov-device-plugin + template: + metadata: + labels: + name: sriov-device-plugin + tier: node + app: sriovdp + spec: + hostNetwork: true + nodeSelector: + beta.kubernetes.io/arch: arm64 + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + serviceAccountName: sriov-device-plugin + containers: + - name: kube-sriovdp +# this is a temporary image repository for arm64 architecture, util CI/CD of the +# sriov-device-plugin will not allow to recreate multiple images + image: alexeyperevalov/arm64-sriov-device-plugin + imagePullPolicy: IfNotPresent + args: + - --log-dir=sriovdp + - --log-level=10 + securityContext: + privileged: true + volumeMounts: + - name: devicesock + mountPath: /var/lib/kubelet/ + readOnly: false + - name: log + mountPath: /var/log + - name: config-volume + mountPath: /etc/pcidp + - name: device-info + mountPath: /var/run/k8s.cni.cncf.io/devinfo/dp + volumes: + - name: devicesock + hostPath: + path: /var/lib/kubelet/ + - name: log + hostPath: + path: /var/log + - name: device-info + hostPath: + path: /var/run/k8s.cni.cncf.io/devinfo/dp + type: DirectoryOrCreate + - name: config-volume + configMap: + name: sriovdp-config + items: + - key: config.json + path: config.json diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-components/sriov_components.sh b/cluster-up/cluster/kind-1.19-sriov/sriov-components/sriov_components.sh new file mode 100644 index 000000000..53a70266a --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-components/sriov_components.sh @@ -0,0 +1,206 @@ +#!/bin/bash + +MANIFESTS_DIR="${KUBEVIRTCI_PATH}/cluster/${KUBEVIRT_PROVIDER}/sriov-components/manifests" +MULTUS_MANIFEST="${MANIFESTS_DIR}/multus.yaml" + +CUSTOM_MANIFESTS="${KUBEVIRTCI_CONFIG_PATH}/${KUBEVIRT_PROVIDER}/manifests" +SRIOV_COMPONENTS_MANIFEST="${CUSTOM_MANIFESTS}/sriov-components.yaml" + +SRIOV_DEVICE_PLUGIN_CONFIG_TEMPLATE="${MANIFESTS_DIR}/sriovdp-config.yaml.in" +SRIOV_DEVICE_PLUGIN_CONFIG="${CUSTOM_MANIFESTS}/sriovdp-config.yaml" + +PATCH_SRIOVDP_RESOURCE_PREFIX_TEMPLATE="${MANIFESTS_DIR}/patch-sriovdp-resource-prefix.yaml.in" +PATCH_SRIOVDP_RESOURCE_PREFIX="${CUSTOM_MANIFESTS}/patch-sriovdp-resource-prefix.yaml" + +PATCH_NODE_SELECTOR_TEMPLATE="${MANIFESTS_DIR}/patch-node-selector.yaml.in" +PATCH_NODE_SELECTOR="${CUSTOM_MANIFESTS}/patch-node-selector.yaml" + +KUBECONFIG="${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubeconfig" +KUBECTL="${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubectl --kubeconfig=${KUBECONFIG}" + +function _kubectl() { + ${KUBECTL} "$@" +} + +function _retry() { + local -r tries=$1 + local -r wait_time=$2 + local -r action=$3 + local -r wait_message=$4 + local -r waiting_action=$5 + + eval $action + local return_code=$? + for i in $(seq $tries); do + if [[ $return_code -ne 0 ]]; then + echo "[$i/$tries] $wait_message" + eval $waiting_action + sleep $wait_time + eval $action + return_code=$? + else + return 0 + fi + done + + return 1 +} + +function _check_all_pods_ready() { + all_pods_ready_condition=$(_kubectl get pods -A --no-headers -o custom-columns=':.status.conditions[?(@.type == "Ready")].status') + if [ "$?" -eq 0 ]; then + pods_not_ready_count=$(grep -cw False <<<"$all_pods_ready_condition") + if [ "$pods_not_ready_count" -eq 0 ]; then + return 0 + fi + fi + + return 1 +} + +# not using kubectl wait since with the sriov operator the pods get restarted a couple of times and this is +# more reliable +function sriov_components::wait_pods_ready() { + local -r tries=30 + local -r wait_time=10 + + local -r wait_message="Waiting for all pods to become ready.." + local -r error_message="Not all pods were ready after $(($tries * $wait_time)) seconds" + + local -r get_pods='_kubectl get pods --all-namespaces' + local -r action="_check_all_pods_ready" + + set +x + trap "set -x" RETURN + + if ! _retry "$tries" "$wait_time" "$action" "$wait_message" "$get_pods"; then + echo $error_message + return 1 + fi + + echo "all pods are ready" + return 0 +} + +function sriov_components::wait_allocatable_resource() { + local -r node=$1 + local resource_name=$2 + local -r expected_value=$3 + + local -r tries=48 + local -r wait_time=10 + + local -r wait_message="wait for $node node to have allocatable resource: $resource_name: $expected_value" + local -r error_message="node $node doesnt have allocatable resource $resource_name:$expected_value" + + # it is necessary to add '\' before '.' in the resource name. + resource_name=$(echo $resource_name | sed s/\\./\\\\\./g) + local -r action='_kubectl get node $node -ocustom-columns=:.status.allocatable.$resource_name --no-headers | grep -w $expected_value' + + if ! _retry $tries $wait_time "$action" "$wait_message"; then + echo $error_message + return 1 + fi + + return 0 +} + +function sriov_components::deploy_multus() { + echo 'Deploying Multus' + sed "s#nfvpe/multus#quay.io/kubevirtci/multus#" "$MULTUS_MANIFEST" | _kubectl apply -f - + + return 0 +} + +function sriov_components::deploy() { + local -r pf_names=$1 + local -r drivers=$2 + local -r resource_prefix=$3 + local -r resource_name=$4 + local -r label_key=$5 + local -r label_value=$6 + + _create_custom_manifests_dir + _prepare_node_selector_patch "$label_key" "$label_value" + _prepare_sriovdp_resource_prefix_patch "$resource_prefix" + _prepare_device_plugin_config \ + "$pf_names" \ + "$resource_name" \ + "$drivers" + _deploy_sriov_components + + return 0 +} + +function _create_custom_manifests_dir() { + mkdir -p "$CUSTOM_MANIFESTS" + + cp -f $(find "$MANIFESTS_DIR"/*.yaml) "$CUSTOM_MANIFESTS" + + return 0 +} + +function _prepare_node_selector_patch() { + local -r label_key=$1 + local -r label_value=$2 + + ( + export LABEL_KEY=$label_key + export LABEL_VALUE=$label_value + envsubst < "$PATCH_NODE_SELECTOR_TEMPLATE" > "$PATCH_NODE_SELECTOR" + ) +} + +function _prepare_sriovdp_resource_prefix_patch() { + local -r resource_prefix=$1 + + ( + export RESOURCE_PREFIX=$resource_prefix + envsubst < "$PATCH_SRIOVDP_RESOURCE_PREFIX_TEMPLATE" > "$PATCH_SRIOVDP_RESOURCE_PREFIX" + ) +} + +function _prepare_device_plugin_config() { + local -r pf_names=$1 + local -r resource_name=$2 + local -r drivers=$3 + + ( + export RESOURCE_NAME=$resource_name + export DRIVERS=$(_format_json_array "$drivers") + export PF_NAMES=$(_format_json_array "$pf_names") + envsubst < "$SRIOV_DEVICE_PLUGIN_CONFIG_TEMPLATE" > "$SRIOV_DEVICE_PLUGIN_CONFIG" + ) + + return 0 +} + +function _format_json_array() { + local -r string=$1 + + local json_array="$string" + # Replace all spaces with ",": aa bb -> aa","bb + local -r replace='","' + json_array="${json_array// /$replace}" + + # Add opening quotes for first element, and closing quotes for last element + # aa","bb -> "aa","bb" + json_array="\"${json_array}\"" + + # Add brackets: "aa","bb" -> ["aa","bb"] + json_array="[${json_array}]" + + echo "$json_array" +} + +function _deploy_sriov_components() { + _kubectl kustomize "$CUSTOM_MANIFESTS" >"$SRIOV_COMPONENTS_MANIFEST" + + echo "Deploying SRIOV components:" + cat "$SRIOV_COMPONENTS_MANIFEST" + + _kubectl apply -f "$SRIOV_COMPONENTS_MANIFEST" + + return 0 +} + diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-node/configure_vfs.sh b/cluster-up/cluster/kind-1.19-sriov/sriov-node/configure_vfs.sh new file mode 100755 index 000000000..c08dd82c3 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-node/configure_vfs.sh @@ -0,0 +1,104 @@ +#! /bin/bash + +set -ex + +function configure_vf_driver() { + local -r vf_sys_device=$1 + local -r driver=$2 + + vf_pci_address=$(basename $vf_sys_device) + # Check if a VF is bound to a different driver + if [ -d "$vf_sys_device/driver" ]; then + vf_bus_pci_device_driver=$(readlink -e $vf_sys_device/driver) + vf_driver_name=$(basename $vf_bus_pci_device_driver) + + # Check if VF already configured with supported driver + if [[ $vf_driver_name == $driver ]]; then + return + else + echo "Unbind VF $vf_pci_address from $vf_driver_name driver" + echo "$vf_pci_address" >> "$vf_bus_pci_device_driver/unbind" + fi + fi + + echo "Bind VF $vf_pci_address to $driver driver" + echo "$driver" >> "$vf_sys_device/driver_override" + echo "$vf_pci_address" >> "/sys/bus/pci/drivers/$driver/bind" + echo "" >> "$vf_sys_device/driver_override" + + return 0 +} + +function create_vfs() { + local -r pf_net_device=$1 + local -r vfs_count=$2 + + local -r pf_name=$(basename $pf_net_device) + local -r pf_sys_device=$(readlink -e $pf_net_device) + + local -r sriov_totalvfs_content=$(cat $pf_sys_device/sriov_totalvfs) + [ $sriov_totalvfs_content -lt $vfs_count ] && \ + echo "FATAL: PF $pf_name, VF's count should be up to sriov_totalvfs: $sriov_totalvfs_content" >&2 && return 1 + + local -r sriov_numvfs_content=$(cat $pf_sys_device/sriov_numvfs) + if [ $sriov_numvfs_content -ne $vfs_count ]; then + echo "Creating $vfs_count VF's on PF $pf_name" + echo 0 >> "$pf_sys_device/sriov_numvfs" + echo "$vfs_count" >> "$pf_sys_device/sriov_numvfs" + sleep 3 + fi + + return 0 +} + +function validate_run_with_sudo() { + [ "$(id -u)" -ne 0 ] && echo "FATAL: This script requires sudo privileges" >&2 && return 1 + + return 0 +} + +function validate_sysfs_mount_as_rw() { + local -r sysfs_permissions=$(grep -Po 'sysfs.*\K(ro|rw)' /proc/mounts) + [ "$sysfs_permissions" != rw ] && echo "FATAL: sysfs is read-only, try to remount as RW" >&2 && return 1 + + return 0 +} + +function ensure_driver_is_loaded() { + local -r driver_name=$1 + local -r module_name=$2 + + if ! grep "$module_name" /proc/modules; then + if ! modprobe "$driver_name"; then + echo "FATAL: failed to load $DRIVER kernel module $DRIVER_KMODULE" >&2 && return 1 + fi + fi + + return 0 +} + +DRIVER="${DRIVER:-vfio-pci}" +DRIVER_KMODULE="${DRIVER_KMODULE:-vfio_pci}" + +validate_run_with_sudo +validate_sysfs_mount_as_rw +ensure_driver_is_loaded $DRIVER $DRIVER_KMODULE + +sriov_pfs=( $(find /sys/class/net/*/device/sriov_numvfs) ) +[ "${#sriov_pfs[@]}" -eq 0 ] && echo "FATAL: Could not find available sriov PFs" >&2 && exit 1 + +for pf_name in $sriov_pfs; do + pf_device=$(dirname "$pf_name") + + echo "Create VF's" + sriov_numvfs=$(cat "$pf_device/sriov_totalvfs") + create_vfs "$pf_device" "$sriov_numvfs" + + echo "Configuring VF's drivers" + # /sys/class/net//device/virtfn* + vfs_sys_devices=$(readlink -e $pf_device/virtfn*) + for vf in $vfs_sys_devices; do + configure_vf_driver "$vf" $DRIVER + ls -l "$vf/driver" + done +done diff --git a/cluster-up/cluster/kind-1.19-sriov/sriov-node/node.sh b/cluster-up/cluster/kind-1.19-sriov/sriov-node/node.sh new file mode 100644 index 000000000..6981c83fb --- /dev/null +++ b/cluster-up/cluster/kind-1.19-sriov/sriov-node/node.sh @@ -0,0 +1,110 @@ +#!/bin/bash + +SCRIPT_PATH=${SCRIPT_PATH:-$(dirname "$(realpath "$0")")} + +CONFIGURE_VFS_SCRIPT_PATH="${SCRIPT_PATH}/configure_vfs.sh" +PFS_IN_USE=${PFS_IN_USE:-} + +function node::discover_host_pfs() { + local -r sriov_pfs=( $(find /sys/class/net/*/device/sriov_numvfs) ) + [ "${#sriov_pfs[@]}" -eq 0 ] && echo "FATAL: Could not find available sriov PFs on host" >&2 && return 1 + + local pf_name + local pf_names=() + for pf in "${sriov_pfs[@]}"; do + pf_name="${pf%%/device/*}" + pf_name="${pf_name##*/}" + if [ $(echo "${PF_BLACKLIST[@]}" | grep "${pf_name}") ]; then + continue + fi + + pfs_names+=( $pf_name ) + done + + echo "${pfs_names[@]}" +} + +# node::configure_sriov_pfs_and_vfs moves SRIOV PF's to nodes netns, +# create SRIOV VF's and configure their driver on each node. +# Exports 'PFS_IN_USE' env variable with the list of SRIOV PF's +# that been moved to nodes netns. +function node::configure_sriov_pfs_and_vfs() { + local -r nodes_array=($1) + local -r pfs_names_array=($2) + local -r pf_count_per_node=$3 + local -r pfs_in_use_var_name=$4 + + local -r config_vf_script=$(basename "$CONFIGURE_VFS_SCRIPT_PATH") + local pfs_to_move=() + local pfs_array_offset=0 + local pfs_in_use=() + local node_exec + + # 'iplink' learns which network namespaces there are by checking /var/run/netns + mkdir -p /var/run/netns + for node in "${nodes_array[@]}"; do + prepare_node_netns "$node" + + ## Move PF's to node netns + # Slice '$pfs_names_array' to have unique silce for each node + # with '$pf_count_per_node' PF's names + pfs_to_move=( "${pfs_names_array[@]:$pfs_array_offset:$pf_count_per_node}" ) + echo "Moving '${pfs_to_move[*]}' PF's to '$node' netns" + for pf_name in "${pfs_to_move[@]}"; do + move_pf_to_node_netns "$node" "$pf_name" + done + # Increment the offset for next slice + pfs_array_offset=$((pfs_array_offset + pf_count_per_node)) + pfs_in_use+=( $pf_name ) + + # KIND mounts sysfs as read-only by default, remount as R/W" + node_exec="docker exec $node" + $node_exec mount -o remount,rw /sys + $node_exec chmod 666 /dev/vfio/vfio + + # Create and configure SRIOV Virtual Functions on SRIOV node + docker cp "$CONFIGURE_VFS_SCRIPT_PATH" "$node:/" + $node_exec bash -c "DRIVER=$VFS_DRIVER DRIVER_KMODULE=$VFS_DRIVER_KMODULE ./$config_vf_script" + + _kubectl label node $node $SRIOV_NODE_LABEL + done + + # Set new variable with the used PF names that will consumed by the caller + eval $pfs_in_use_var_name="'${pfs_in_use[*]}'" +} + +function prepare_node_netns() { + local -r node_name=$1 + local -r node_pid=$(docker inspect -f '{{.State.Pid}}' "$node_name") + + # Docker does not create the required symlink for a container netns + # it perverts iplink from learning that container netns. + # Thus it is necessary to create symlink between the current + # worker node (container) netns to /var/run/netns (consumed by iplink) + # Now the node container netns named with the node name will be visible. + ln -sf "/proc/$node_pid/ns/net" "/var/run/netns/$node_name" +} + +function move_pf_to_node_netns() { + local -r node_name=$1 + local -r pf_name=$2 + + # Move PF to node network-namespace + ip link set "$pf_name" netns "$node_name" + # Ensure current PF is up + ip netns exec "$node_name" ip link set up dev "$pf_name" + ip netns exec "$node_name" ip link show +} + +function node::total_vfs_count() { + local -r node_name=$1 + local -r node_pid=$(docker inspect -f '{{.State.Pid}}' "$node_name") + local -r pfs_sriov_numvfs=( $(cat /proc/$node_pid/root/sys/class/net/*/device/sriov_numvfs) ) + local total_vfs_on_node=0 + + for num_vfs in "${pfs_sriov_numvfs[@]}"; do + total_vfs_on_node=$((total_vfs_on_node + num_vfs)) + done + + echo "$total_vfs_on_node" +} diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/config_sriov.sh b/cluster-up/cluster/kind-1.19-sriov/sriov_operator.sh old mode 100755 new mode 100644 similarity index 66% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/config_sriov.sh rename to cluster-up/cluster/kind-1.19-sriov/sriov_operator.sh index 759ad74e4..4b797dc9d --- a/cluster-up/cluster/kind-k8s-sriov-1.17.0/config_sriov.sh +++ b/cluster-up/cluster/kind-1.19-sriov/sriov_operator.sh @@ -1,20 +1,11 @@ #!/bin/bash -set -xe -source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh +set -ex -MANIFESTS_DIR="${KUBEVIRTCI_PATH}/cluster/$KUBEVIRT_PROVIDER/manifests" -CERTCREATOR_PATH="${KUBEVIRTCI_PATH}/cluster/$KUBEVIRT_PROVIDER/certcreator" KUBECONFIG_PATH="${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubeconfig" - -MASTER_NODE="${CLUSTER_NAME}-control-plane" -WORKER_NODE_ROOT="${CLUSTER_NAME}-worker" -PF_COUNT_PER_NODE=${PF_COUNT_PER_NODE:-1} +CERTCREATOR_PATH="${KUBEVIRTCI_PATH}/cluster/$KUBEVIRT_PROVIDER/certcreator" OPERATOR_GIT_HASH=8d3c30de8ec5a9a0c9eeb84ea0aa16ba2395cd68 # release-4.4 -SRIOV_OPERATOR_NAMESPACE="sriov-network-operator" - -[ $PF_COUNT_PER_NODE -le 0 ] && echo "FATAL: PF_COUNT_PER_NODE must be a positive integer" >&2 && exit 1 # This function gets a command and invoke it repeatedly # until the command return code is zero @@ -109,7 +100,7 @@ function _check_all_pods_ready() { # not using kubectl wait since with the sriov operator the pods get restarted a couple of times and this is # more reliable -function wait_pods_ready { +function sriov_operator::wait_pods_ready { local -r tries=30 local -r wait_time=10 @@ -131,7 +122,7 @@ function wait_pods_ready { return 0 } -function wait_allocatable_resource { +function sriov_operator::wait_allocatable_resource { local -r node=$1 local resource_name=$2 local -r expected_value=$3 @@ -154,7 +145,7 @@ function wait_allocatable_resource { return 0 } -function deploy_multus { +function sriov_operator::deploy_multus { echo 'Deploying Multus' _kubectl create -f $MANIFESTS_DIR/multus.yaml @@ -167,7 +158,7 @@ function deploy_multus { return 0 } -function deploy_sriov_operator { +function sriov_operator::deploy_sriov_operator { echo 'Downloading the SR-IOV operator' operator_path=${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/sriov-network-operator-${OPERATOR_GIT_HASH} if [ ! -d $operator_path ]; then @@ -206,7 +197,7 @@ function deploy_sriov_operator { return 0 } -function apply_sriov_node_policy { +function sriov_operator::apply_sriov_node_policy { local -r policy_file=$1 local -r node_pf=$2 local -r num_vfs=$3 @@ -219,91 +210,3 @@ function apply_sriov_node_policy { return 0 } - -function move_sriov_pfs_netns_to_node { - local -r node=$1 - local -r pf_count_per_node=$2 - local -r pid="$(docker inspect -f '{{.State.Pid}}' $node)" - local pf_array=() - - mkdir -p /var/run/netns/ - ln -sf /proc/$pid/ns/net "/var/run/netns/$node" - - local -r sriov_pfs=( $(find /sys/class/net/*/device/sriov_numvfs) ) - [ "${#sriov_pfs[@]}" -eq 0 ] && echo "FATAL: Could not find available sriov PFs" >&2 && return 1 - - for pf in "${sriov_pfs[@]}"; do - local pf_name="${pf%%/device/*}" - pf_name="${pf_name##*/}" - - if [ $(echo "${PF_BLACKLIST[@]}" | grep "${pf_name}") ]; then - continue - fi - - # In case two clusters started at the same time, they might race on the same PF. - # The first will manage to assign the PF to its container, and the 2nd will just skip it - # and try the rest of the PFs available. - if ip link set "$pf_name" netns "$node"; then - if timeout 10s bash -c "until ip netns exec $node ip link show $pf_name > /dev/null; do sleep 1; done"; then - pf_array+=("$pf_name") - [ "${#pf_array[@]}" -eq "$pf_count_per_node" ] && break - fi - fi - done - - [ "${#pf_array[@]}" -lt "$pf_count_per_node" ] && \ - echo "FATAL: Not enough PFs allocated, PF_BLACKLIST (${PF_BLACKLIST[@]}), PF_COUNT_PER_NODE ${PF_COUNT_PER_NODE}" >&2 && \ - return 1 - - echo "${pf_array[@]}" -} - -# The first worker needs to be handled specially as it has no ending number, and sort will not work -# We add the 0 to it and we remove it if it's the candidate worker -WORKER=$(_kubectl get nodes | grep $WORKER_NODE_ROOT | sed "s/\b$WORKER_NODE_ROOT\b/${WORKER_NODE_ROOT}0/g" | sort -r | awk 'NR==1 {print $1}') -if [[ -z "$WORKER" ]]; then - SRIOV_NODE=$MASTER_NODE -else - SRIOV_NODE=$WORKER -fi - -# this is to remove the ending 0 in case the candidate worker is the first one -if [[ "$SRIOV_NODE" == "${WORKER_NODE_ROOT}0" ]]; then - SRIOV_NODE=${WORKER_NODE_ROOT} -fi - -NODE_PFS=($(move_sriov_pfs_netns_to_node "$SRIOV_NODE" "$PF_COUNT_PER_NODE")) - -SRIOV_NODE_CMD="docker exec -it -d ${SRIOV_NODE}" -${SRIOV_NODE_CMD} mount -o remount,rw /sys # kind remounts it as readonly when it starts, we need it to be writeable -${SRIOV_NODE_CMD} chmod 666 /dev/vfio/vfio -_kubectl label node $SRIOV_NODE sriov=true - -for pf in "${NODE_PFS[@]}"; do - docker exec $SRIOV_NODE bash -c "echo 0 > /sys/class/net/$pf/device/sriov_numvfs" -done - -deploy_multus -wait_pods_ready - -deploy_sriov_operator -wait_pods_ready - -# We use just the first suitable pf, for the SriovNetworkNodePolicy manifest. -# We also need the num of vfs because if we don't set this value equals to the total, in case of mellanox -# the sriov operator will trigger a node reboot to update the firmware -NODE_PF=$NODE_PFS -NODE_PF_NUM_VFS=$(docker exec $SRIOV_NODE cat /sys/class/net/$NODE_PF/device/sriov_totalvfs) - -POLICY="$MANIFESTS_DIR/network_config_policy.yaml" -apply_sriov_node_policy "$POLICY" "$NODE_PF" "$NODE_PF_NUM_VFS" - -# Verify that sriov node has sriov VFs allocatable resource -resource_name=$(sed -n 's/.*resourceName: *//p' $POLICY) -wait_allocatable_resource $SRIOV_NODE "openshift.io/$resource_name" $NODE_PF_NUM_VFS -wait_pods_ready - -_kubectl get nodes -_kubectl get pods -n $SRIOV_OPERATOR_NAMESPACE -echo -echo "$KUBEVIRT_PROVIDER cluster is ready" diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/sriovdp_setup.sh b/cluster-up/cluster/kind-1.19-sriov/sriovdp_setup.sh similarity index 100% rename from cluster-up/cluster/kind-k8s-sriov-1.17.0/sriovdp_setup.sh rename to cluster-up/cluster/kind-1.19-sriov/sriovdp_setup.sh diff --git a/cluster-up/cluster/kind-1.19-vgpu/README.md b/cluster-up/cluster/kind-1.19-vgpu/README.md new file mode 100644 index 000000000..4d1877839 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-vgpu/README.md @@ -0,0 +1,45 @@ +# K8S 1.19.0 with mdev support in a Kind cluster + +Provides a pre-deployed k8s cluster with version 1.19.11 that runs using [kind](https://github.com/kubernetes-sigs/kind) The cluster is completely ephemeral and is recreated on every cluster restart. +The KubeVirt containers are built on the local machine and are then pushed to a registry which is exposed at +`localhost:5000`. + +## Bringing the cluster up + +The following needs to be executed as root. + +```bash +export KUBEVIRT_PROVIDER=kind-1.19-vgpu +make cluster-up +``` + +The cluster can be accessed as usual: + +```bash +$ cluster-up/kubectl.sh get nodes +NAME STATUS ROLES AGE VERSION +vgpu-control-plane Ready master 6m14s v1.19.0 +``` + +## Bringing the cluster down + +```bash +make cluster-down +``` + +This destroys the whole cluster. + +## Setting a custom kind version + +In order to use a custom kind image / kind version, +export KIND_NODE_IMAGE, KIND_VERSION, KUBECTL_PATH before running cluster-up. +For example in order to use kind 0.9.0 (which is based on k8s-1.19.1) use: +```bash +export KIND_NODE_IMAGE="kindest/node:v1.19.1@sha256:98cf5288864662e37115e362b23e4369c8c4a408f99cbc06e58ac30ddc721600" +export KIND_VERSION="0.9.0" +export KUBECTL_PATH="/usr/bin/kubectl" +``` +This allows users to test or use custom images / different kind versions before making them official. +See https://github.com/kubernetes-sigs/kind/releases for details about node images according to the kind version. + +- In order to use `make cluster-down` please make sure the right `CLUSTER_NAME` is exported. diff --git a/cluster-up/cluster/kind-1.19-vgpu/config_vgpu_cluster.sh b/cluster-up/cluster/kind-1.19-vgpu/config_vgpu_cluster.sh new file mode 100755 index 000000000..ef838c7d1 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-vgpu/config_vgpu_cluster.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +[ $(id -u) -ne 0 ] && echo "FATAL: this script requires sudo privileges" >&2 && exit 1 + +set -xe + +SCRIPT_PATH=$(dirname "$(realpath "$0")") + +source ${SCRIPT_PATH}/vgpu-node/node.sh +echo "_kubectl: " ${_kubectl} +echo "KUBECTL_PATH: " $KUBECTL_PATH +echo "KUBEVIRTCI_PATH: " ${KUBEVIRTCI_PATH} +source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh +echo "_kubectl: " ${_kubectl} + +nodes=($(_kubectl get nodes -o custom-columns=:.metadata.name --no-headers)) +node::remount_sysfs "${nodes[*]}" +node::discover_host_gpus + +_kubectl get nodes diff --git a/cluster-up/cluster/kind-1.19-vgpu/provider.sh b/cluster-up/cluster/kind-1.19-vgpu/provider.sh new file mode 100755 index 000000000..665b7083a --- /dev/null +++ b/cluster-up/cluster/kind-1.19-vgpu/provider.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +set -e + +DEFAULT_CLUSTER_NAME="vgpu" +DEFAULT_HOST_PORT=5000 +ALTERNATE_HOST_PORT=5001 +export CLUSTER_NAME=${CLUSTER_NAME:-$DEFAULT_CLUSTER_NAME} + +if [ $CLUSTER_NAME == $DEFAULT_CLUSTER_NAME ]; then + export HOST_PORT=$DEFAULT_HOST_PORT +else + export HOST_PORT=$ALTERNATE_HOST_PORT +fi + +#'kubevirt-test-default1' is the default namespace of +# Kubevirt VGPU tests where the VGPU VM's will be created. +VGPU_TESTS_NS="${VGPU_TESTS_NS:-kubevirt-test-default1}" + +function set_kind_params() { + export KIND_VERSION="${KIND_VERSION:-0.11.1}" + export KIND_NODE_IMAGE="${KIND_NODE_IMAGE:-quay.io/kubevirtci/kindest_node:v1.19.11@sha256:cbecc517bfad65e368cd7975d1e8a4f558d91160c051d0b1d10ff81488f5fb06}" + export KUBECTL_PATH="${KUBECTL_PATH:-/bin/kubectl}" +} + +function up() { + # load the vfio_mdev module + /usr/sbin/modprobe vfio_mdev + + # print hardware info for easier debugging based on logs + echo 'Available cards' + docker run --rm --cap-add=SYS_RAWIO quay.io/phoracek/lspci@sha256:0f3cacf7098202ef284308c64e3fc0ba441871a846022bb87d65ff130c79adb1 sh -c "lspci -k | grep -EA2 'VGA|3D'" + echo "" + + cp $KIND_MANIFESTS_DIR/kind.yaml ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml + _add_worker_kubeadm_config_patch + _add_worker_extra_mounts + kind_up + + # remove the rancher.io kind default storageClass + _kubectl delete sc standard + + ${KUBEVIRTCI_PATH}/cluster/$KUBEVIRT_PROVIDER/config_vgpu_cluster.sh + + # In order to support live migration on containerized cluster we need to workaround + # Libvirt uuid check for source and target nodes. + # To do that we create PodPreset that mounts fake random product_uuid to virt-launcher pods, + # and kubevirt VGPU tests namespace for the PodPrest beforhand. + podpreset::expose_unique_product_uuid_per_node "$CLUSTER_NAME" "$VGPU_TESTS_NS" + + echo "$KUBEVIRT_PROVIDER cluster '$CLUSTER_NAME' is ready" +} + +set_kind_params + +source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh +source ${KUBEVIRTCI_PATH}/cluster/kind/podpreset.sh diff --git a/cluster-up/cluster/kind-1.19-vgpu/vgpu-node/node.sh b/cluster-up/cluster/kind-1.19-vgpu/vgpu-node/node.sh new file mode 100644 index 000000000..28fdfbca8 --- /dev/null +++ b/cluster-up/cluster/kind-1.19-vgpu/vgpu-node/node.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +function node::discover_host_gpus() { + local -r gpu_types=( $(find /sys/class/mdev_bus/*/mdev_supported_types) ) + [ "${#gpu_types[@]}" -eq 0 ] && echo "FATAL: Could not find available GPUs on host" >&2 && return 1 + + local gpu_addr + local gpu_addresses=() + for path in "${gpu_types}"; do + gpu_addr="${gpu_types#/sys/class/mdev_bus/}" + gpu_addr=${gpu_addr%/*} + + gpu_addresses+=( $gpu_addr ) + done + + echo "${gpu_addresses[@]}" +} + +function node::remount_sysfs() { + local -r nodes_array=($1) + local node_exec + + for node in "${nodes_array[@]}"; do + + # KIND mounts sysfs as read-only by default, remount as R/W" + node_exec="docker exec $node" + $node_exec mount -o remount,rw /sys + $node_exec chmod 666 /dev/vfio/vfio + + done +} + diff --git a/cluster-up/cluster/kind-k8s-1.17/README.md b/cluster-up/cluster/kind-k8s-1.17/README.md index bbba91339..1e3a60b67 100644 --- a/cluster-up/cluster/kind-k8s-1.17/README.md +++ b/cluster-up/cluster/kind-k8s-1.17/README.md @@ -31,3 +31,16 @@ make cluster-down This destroys the whole cluster. + +## Setting a custom kind version + +In order to use a custom kind image / kind version, +export KIND_NODE_IMAGE, KIND_VERSION, KUBECTL_PATH before running cluster-up. +For example in order to use kind 0.9.0 (which is based on k8s-1.19.1) use: +```bash +export KIND_NODE_IMAGE="kindest/node:v1.19.1@sha256:98cf5288864662e37115e362b23e4369c8c4a408f99cbc06e58ac30ddc721600" +export KIND_VERSION="0.9.0" +export KUBECTL_PATH="/usr/bin/kubectl" +``` +This allows users to test or use custom images / different kind versions before making them official. +See https://github.com/kubernetes-sigs/kind/releases for details about node images according to the kind version. diff --git a/cluster-up/cluster/kind-k8s-1.17/provider.sh b/cluster-up/cluster/kind-k8s-1.17/provider.sh index 8e2ce00d5..37787182a 100644 --- a/cluster-up/cluster/kind-k8s-1.17/provider.sh +++ b/cluster-up/cluster/kind-k8s-1.17/provider.sh @@ -5,9 +5,12 @@ set -e DOCKER="${CONTAINER_RUNTIME:-docker}" export CLUSTER_NAME="kind-1.17" -export KIND_NODE_IMAGE="kindest/node:v1.17.2" -source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh +function set_kind_params() { + export KIND_NODE_IMAGE="${KIND_NODE_IMAGE:-kindest/node:v1.17.2}" + export KIND_VERSION="${KIND_VERSION:-0.7.0}" + export KUBECTL_PATH="${KUBECTL_PATH:-/kind/bin/kubectl}" +} function up() { cp $KIND_MANIFESTS_DIR/kind.yaml ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml @@ -49,3 +52,7 @@ function mount_disk() { $DOCKER exec $node bash -c "mkdir -p /mnt/local-storage/local/disk${idx}" $DOCKER exec $node bash -c "mount -o bind /var/local/kubevirt-storage/local-volume/disk${idx} /mnt/local-storage/local/disk${idx}" } + +set_kind_params + +source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/provider.sh b/cluster-up/cluster/kind-k8s-sriov-1.17.0/provider.sh deleted file mode 100755 index f3df0862c..000000000 --- a/cluster-up/cluster/kind-k8s-sriov-1.17.0/provider.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEFAULT_CLUSTER_NAME="sriov" -DEFAULT_HOST_PORT=5000 -ALTERNATE_HOST_PORT=5001 -export CLUSTER_NAME=${CLUSTER_NAME:-$DEFAULT_CLUSTER_NAME} - -if [ $CLUSTER_NAME == $DEFAULT_CLUSTER_NAME ]; then - export HOST_PORT=$DEFAULT_HOST_PORT -else - export HOST_PORT=$ALTERNATE_HOST_PORT -fi - -function set_kind_params() { - export KIND_NODE_IMAGE="${KIND_NODE_IMAGE:-kindest/node:v1.17.0}" - export KIND_VERSION="${KIND_VERSION:-0.7.0}" - export KUBECTL_PATH="${KUBECTL_PATH:-/kind/bin/kubectl}" -} - -function up() { - if [[ "$KUBEVIRT_NUM_NODES" -ne 2 ]]; then - echo 'SR-IOV cluster can be only started with 2 nodes' - exit 1 - fi - - # print hardware info for easier debugging based on logs - echo 'Available NICs' - docker run --rm --cap-add=SYS_RAWIO quay.io/phoracek/lspci@sha256:0f3cacf7098202ef284308c64e3fc0ba441871a846022bb87d65ff130c79adb1 sh -c "lspci | egrep -i 'network|ethernet'" - echo "" - - cp $KIND_MANIFESTS_DIR/kind.yaml ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml - - kind_up - - # remove the rancher.io kind default storageClass - _kubectl delete sc standard - - ${KUBEVIRTCI_PATH}/cluster/$KUBEVIRT_PROVIDER/config_sriov.sh -} - -set_kind_params - -source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh diff --git a/cluster-up/cluster/kind/common.sh b/cluster-up/cluster/kind/common.sh index d76482819..d7b644b71 100755 --- a/cluster-up/cluster/kind/common.sh +++ b/cluster-up/cluster/kind/common.sh @@ -25,6 +25,7 @@ export KIND_MANIFESTS_DIR="${KUBEVIRTCI_PATH}/cluster/kind/manifests" export KIND_NODE_CLI="docker exec -it " export KUBEVIRTCI_PATH export KUBEVIRTCI_CONFIG_PATH +KIND_DEFAULT_NETWORK="kind" KUBECTL="${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubectl --kubeconfig=${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubeconfig" @@ -81,17 +82,22 @@ function _ssh_into_node() { } function _run_registry() { + local -r network=${1} + until [ -z "$(docker ps -a | grep $REGISTRY_NAME)" ]; do docker stop $REGISTRY_NAME || true docker rm $REGISTRY_NAME || true sleep 5 done - docker run -d -p $HOST_PORT:5000 --restart=always --name $REGISTRY_NAME registry:2 + docker run -d --network=${network} -p $HOST_PORT:5000 --restart=always --name $REGISTRY_NAME registry:2 } function _configure_registry_on_node() { - _configure-insecure-registry-and-reload "${NODE_CMD} $1 bash -c" - ${NODE_CMD} $1 sh -c "echo $(docker inspect --format '{{.NetworkSettings.IPAddress }}' $REGISTRY_NAME)'\t'registry >> /etc/hosts" + local -r node=${1} + local -r network=${2} + + _configure-insecure-registry-and-reload "${NODE_CMD} ${node} bash -c" + ${NODE_CMD} ${node} sh -c "echo $(docker inspect --format "{{.NetworkSettings.Networks.${network}.IPAddress }}" $REGISTRY_NAME)'\t'registry >> /etc/hosts" } function _install_cnis { @@ -237,17 +243,17 @@ function setup_kind() { done _wait_containers_ready - _run_registry + _run_registry "$KIND_DEFAULT_NETWORK" for node in $(_get_nodes | awk '{print $1}'); do - _configure_registry_on_node "$node" + _configure_registry_on_node "$node" "$KIND_DEFAULT_NETWORK" _configure_network "$node" done prepare_config } function _add_worker_extra_mounts() { - if [[ "$KUBEVIRT_PROVIDER" =~ sriov.* ]]; then + if [[ "$KUBEVIRT_PROVIDER" =~ sriov.* || "$KUBEVIRT_PROVIDER" =~ vgpu.* ]]; then cat <> ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml extraMounts: - containerPath: /lib/modules diff --git a/cluster-up/cluster/kind/manifests/kube-calico.diff.in b/cluster-up/cluster/kind/manifests/kube-calico.diff.in index a2c6d379f..9b1b9251a 100644 --- a/cluster-up/cluster/kind/manifests/kube-calico.diff.in +++ b/cluster-up/cluster/kind/manifests/kube-calico.diff.in @@ -1,5 +1,5 @@ ---- kube-calico.yaml 2020-10-26 09:43:25.494348951 +0200 -+++ manifests/kube-calico.yaml 2020-06-08 18:22:46.909259279 +0300 +--- kube-calico.yaml.in 2021-03-18 09:52:16.000000000 +0200 ++++ kube-calico.yaml 2021-04-18 16:21:40.000000000 +0300 @@ -14,7 +14,7 @@ # Configure the MTU to use for workload interfaces and the # tunnels. For IPIP, set to your network MTU - 20; for VXLAN @@ -19,3 +19,48 @@ "policy": { "type": "k8s" }, +@@ -557,7 +560,7 @@ + # It can be deleted if this is a fresh installation, or if you have already + # upgraded to use calico-ipam. + - name: upgrade-ipam +- image: calico/cni:v3.14.1 ++ image: quay.io/kubevirtci/calico_cni:v3.14.1 + command: ["/opt/cni/bin/calico-ipam", "-upgrade"] + env: + - name: KUBERNETES_NODE_NAME +@@ -579,7 +582,7 @@ + # This container installs the CNI binaries + # and CNI network config file on each node. + - name: install-cni +- image: calico/cni:v3.14.1 ++ image: quay.io/kubevirtci/calico_cni:v3.14.1 + command: ["/install-cni.sh"] + env: + # Name of the CNI config file to create. +@@ -615,7 +618,7 @@ + # Adds a Flex Volume Driver that creates a per-pod Unix Domain Socket to allow Dikastes + # to communicate with Felix over the Policy Sync API. + - name: flexvol-driver +- image: calico/pod2daemon-flexvol:v3.14.1 ++ image: quay.io/kubevirtci/calico_pod2daemon-flexvol:v3.14.1 + volumeMounts: + - name: flexvol-driver-host + mountPath: /host/driver +@@ -626,7 +629,7 @@ + # container programs network policy and routes on each + # host. + - name: calico-node +- image: calico/node:v3.14.1 ++ image: quay.io/kubevirtci/calico_node:v3.14.1 + env: + # Use Kubernetes API as the backing datastore. + - name: DATASTORE_TYPE +@@ -809,7 +812,7 @@ + priorityClassName: system-cluster-critical + containers: + - name: calico-kube-controllers +- image: calico/kube-controllers:v3.14.1 ++ image: quay.io/kubevirtci/calico_kube-controllers:v3.14.1 + env: + # Choose which controllers to run. + - name: ENABLED_CONTROLLERS diff --git a/cluster-up/cluster/kind/manifests/product-uuid-podpreset.yaml b/cluster-up/cluster/kind/manifests/product-uuid-podpreset.yaml new file mode 100644 index 000000000..58452cd5d --- /dev/null +++ b/cluster-up/cluster/kind/manifests/product-uuid-podpreset.yaml @@ -0,0 +1,16 @@ +apiVersion: settings.k8s.io/v1alpha1 +kind: PodPreset +metadata: + name: virt-launcher-fake-product-uuid +spec: + selector: + matchLabels: + kubevirt.io: virt-launcher + volumeMounts: + - name: product-uuid + mountPath: /sys/class/dmi/id/product_uuid + volumes: + - name: product-uuid + hostPath: + path: /proc/sys/kernel/random/uuid + diff --git a/cluster-up/cluster/kind/podpreset.sh b/cluster-up/cluster/kind/podpreset.sh new file mode 100644 index 000000000..2a854f917 --- /dev/null +++ b/cluster-up/cluster/kind/podpreset.sh @@ -0,0 +1,55 @@ + #!/usr/bin/env bash + +set -e + +source ${KUBEVIRTCI_PATH}/cluster/kind/common.sh + +function podpreset::enable_admission_plugin() { + local -r cluster_name=$1 + + docker exec "$cluster_name-control-plane" bash -c 'sed -i \ + -e "s/NodeRestriction/NodeRestriction,PodPreset/" \ + -e "/NodeRestriction,PodPreset/ a\ - --runtime-config=settings.k8s.io/v1alpha1=true" \ + /etc/kubernetes/manifests/kube-apiserver.yaml' +} + +function podpreset::validate_admission_plugin_is_enabled() { + local -r cluster_name=$1 + local -r wait_time=$2 + local -r control_plane_container="$cluster_name-control-plane" + + if ! timeout "${wait_time}s" bash <&2 + return 1 + fi +} + +function podpreset::create_virt_launcher_fake_product_uuid_podpreset() { + local -r namespace=$1 + + if ! _kubectl get ns "$namespace" &>2; then + _kubectl create ns "$namespace" + fi + + _kubectl apply -f "$KIND_MANIFESTS_DIR/product-uuid-podpreset.yaml" -n "$namespace" +} + +function podpreset::expose_unique_product_uuid_per_node() { + local -r cluster_name=$1 + local -r namespace=$2 + + podpreset::enable_admission_plugin "$cluster_name" + podpreset::validate_admission_plugin_is_enabled "$cluster_name" "30" + podpreset::create_virt_launcher_fake_product_uuid_podpreset "$namespace" +} diff --git a/cluster-up/hack/common.sh b/cluster-up/hack/common.sh index 2b5d0dbe1..4387004e1 100644 --- a/cluster-up/hack/common.sh +++ b/cluster-up/hack/common.sh @@ -17,11 +17,15 @@ fi KUBEVIRTCI_CLUSTER_PATH=${KUBEVIRTCI_CLUSTER_PATH:-${KUBEVIRTCI_PATH}/cluster} -KUBEVIRT_PROVIDER=${KUBEVIRT_PROVIDER:-k8s-1.18} +KUBEVIRT_PROVIDER=${KUBEVIRT_PROVIDER:-k8s-1.20} KUBEVIRT_NUM_NODES=${KUBEVIRT_NUM_NODES:-1} KUBEVIRT_MEMORY_SIZE=${KUBEVIRT_MEMORY_SIZE:-5120M} KUBEVIRT_NUM_SECONDARY_NICS=${KUBEVIRT_NUM_SECONDARY_NICS:-0} KUBEVIRT_DEPLOY_ISTIO=${KUBEVIRT_DEPLOY_ISTIO:-false} +KUBEVIRT_DEPLOY_PROMETHEUS=${KUBEVIRT_DEPLOY_PROMETHEUS:-false} +KUBEVIRT_DEPLOY_PROMETHEUS_ALERTMANAGER=${KUBEVIRT_DEPLOY_PROMETHEUS_ALERTMANAGER-false} +KUBEVIRT_DEPLOY_GRAFANA=${KUBEVIRT_DEPLOY_GRAFANA:-false} +KUBEVIRT_CGROUPV2=${KUBEVIRT_CGROUPV2:-false} # If on a developer setup, expose ocp on 8443, so that the openshift web console can be used (the port is important because of auth redirects) # http and https ports are accessed by testing framework and should not be randomized @@ -35,4 +39,4 @@ provider_prefix=${JOB_NAME:-${KUBEVIRT_PROVIDER}}${EXECUTOR_NUMBER} job_prefix=${JOB_NAME:-kubevirt}${EXECUTOR_NUMBER} mkdir -p $KUBEVIRTCI_CONFIG_PATH/$KUBEVIRT_PROVIDER -KUBEVIRTCI_TAG=2103240101-142f745 +KUBEVIRTCI_TAG=2108081530-91f55e3 diff --git a/hack/update-kubevirtci.sh b/hack/update-kubevirtci.sh index e66124ab7..ca46d5a2d 100755 --- a/hack/update-kubevirtci.sh +++ b/hack/update-kubevirtci.sh @@ -16,15 +16,13 @@ SCRIPT_ROOT="$(cd "$(dirname $0)/../" && pwd -P)" -# the kubevirtci commit hash to vendor from -kubevirtci_git_hash=142f7450f94d866db863b4af254b5428fe1c570a +# the kubevirtci release to vendor from (https://github.com/kubevirt/kubevirtci/releases) +kubevirtci_release_tag=2108081530-91f55e3 # remove previous cluster-up dir entirely before vendoring rm -rf ${SCRIPT_ROOT}/cluster-up # download and extract the cluster-up dir from a specific hash in kubevirtci -curl -L https://github.com/kubevirt/kubevirtci/archive/${kubevirtci_git_hash}/kubevirtci.tar.gz | tar xz kubevirtci-${kubevirtci_git_hash}/cluster-up --strip-component 1 +curl -L https://github.com/kubevirt/kubevirtci/archive/${kubevirtci_release_tag}/kubevirtci.tar.gz | tar xz kubevirtci-${kubevirtci_release_tag}/cluster-up --strip-component 1 -rm -f "${SCRIPT_ROOT}/cluster-up/cluster/kind-k8s-sriov-1.17.0/csrcreator/certsecret.go" - -echo "KUBEVIRTCI_TAG=$(curl -L https://storage.googleapis.com/kubevirt-prow/release/kubevirt/kubevirtci/latest)" >>${SCRIPT_ROOT}/cluster-up/hack/common.sh +echo "KUBEVIRTCI_TAG=${kubevirtci_release_tag}" >>${SCRIPT_ROOT}/cluster-up/hack/common.sh diff --git a/manifests/templates/registry-host.yaml.in b/manifests/templates/registry-host.yaml.in index 678ff8545..cb8d786c7 100644 --- a/manifests/templates/registry-host.yaml.in +++ b/manifests/templates/registry-host.yaml.in @@ -75,6 +75,12 @@ spec: - name: registry-populate image: {{ .DockerRepo }}/cdi-func-test-registry-populate:{{ .DockerTag }} imagePullPolicy: {{ .PullPolicy }} + securityContext: + capabilities: + add: + # building using buildah requires a properly installed shadow-utils package (which in turn requires SETFCAP) + # https://www.redhat.com/sysadmin/podman-inside-kubernetes + - SETFCAP args: [ "/tmp/shared/images/", "/tmp/docker-images", "127.0.0.1", "443", "/tmp/health/healthy", "15", "/tmp/health/ready", "15"] volumeMounts: - name: "images" diff --git a/tests/utils/common.go b/tests/utils/common.go index 9e2c6395a..03c874bef 100644 --- a/tests/utils/common.go +++ b/tests/utils/common.go @@ -148,7 +148,7 @@ func UpdateCDIConfigWithOptions(c client.Client, opts metav1.UpdateOptions, upda return err } - if err = wait.PollImmediate(1*time.Second, 10*time.Second, func() (bool, error) { + if err = wait.PollImmediate(1*time.Second, 20*time.Second, func() (bool, error) { cfg := &cdiv1.CDIConfig{} err := c.Get(context.TODO(), types.NamespacedName{Name: "config"}, cfg) return apiequality.Semantic.DeepEqual(&cfg.Spec, cdi.Spec.Config), err diff --git a/tools/cdi-func-test-registry-init/populate-registry.sh b/tools/cdi-func-test-registry-init/populate-registry.sh index 4dc98b423..24186cbe1 100644 --- a/tools/cdi-func-test-registry-init/populate-registry.sh +++ b/tools/cdi-func-test-registry-init/populate-registry.sh @@ -114,6 +114,9 @@ function pushImages { #remove storage.conf if exists rm -rf /etc/containers/storage.conf +#building using buildah requires a properly installed shadow-utils package (which in turn requires SETFCAP) +rpm --restore shadow-utils 2>/dev/null + #start health beat health $HEALTH_PATH $HEALTH_PERIOD &