whereabouts/pkg/node-controller/controller.go
Thomas Ferrandiz daa12ca505 Update code generator script
The script we were using is fully deprecated so we need to upgrade
to a new code generator.
2024-12-16 16:05:40 +00:00

618 lines
21 KiB
Go

package node_controller
import (
"context"
"fmt"
"sort"
"time"
"golang.org/x/time/rate"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
coreinformers "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
cncfV1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned"
nadinformers "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/informers/externalversions/k8s.cni.cncf.io/v1"
nadlisters "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/api/whereabouts.cni.cncf.io/v1alpha1"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/config"
clientset "github.com/k8snetworkplumbingwg/whereabouts/pkg/generated/clientset/versioned"
whereaboutsInformers "github.com/k8snetworkplumbingwg/whereabouts/pkg/generated/informers/externalversions/whereabouts.cni.cncf.io/v1alpha1"
whereaboutsListers "github.com/k8snetworkplumbingwg/whereabouts/pkg/generated/listers/whereabouts.cni.cncf.io/v1alpha1"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/iphelpers"
"github.com/k8snetworkplumbingwg/whereabouts/pkg/types"
)
const controllerAgentName = "node-controller"
const (
whereaboutsConfigPath = "/etc/cni/net.d/whereabouts.d/whereabouts.conf"
)
// Controller is the controller implementation for Foo resources
type Controller struct {
// kubeclientset is a standard kubernetes clientset
kubeclientset kubernetes.Interface
// sampleclientset is a clientset for our own API group
whereaboutsclientset clientset.Interface
nadclientset nadclient.Interface
nodeLister corelisters.NodeLister
nodeInformer coreinformers.NodeInformer
nodesSynced cache.InformerSynced
nodeSlicePoolLister whereaboutsListers.NodeSlicePoolLister
nodeSlicePoolInformer whereaboutsInformers.NodeSlicePoolInformer
nodeSlicePoolSynced cache.InformerSynced
nadInformer nadinformers.NetworkAttachmentDefinitionInformer
nadLister nadlisters.NetworkAttachmentDefinitionLister
nadSynced cache.InformerSynced
// workqueue is a rate limited work queue. This is used to queue work to be
// processed instead of performing it as soon as a change happens. This
// means we can ensure we only process a fixed amount of resources at a
// time, and makes it easy to ensure we are never processing the same item
// simultaneously in two different workers.
workqueue workqueue.RateLimitingInterface
// recorder is an event recorder for recording Event resources to the
// Kubernetes API.
recorder record.EventRecorder
//For testing, sort nodes before assigning to get consistent return values
sortResults bool
// whereabouts namespace set from WHEREABOUTS_NAMESPACE env var, should match what's in the daemonset
// this is where the IPPools and NodeSlicePools will be created
whereaboutsNamespace string
}
// NewController returns a new sample controller
func NewController(
ctx context.Context,
kubeclientset kubernetes.Interface,
whereaboutsclientset clientset.Interface,
nadclientset nadclient.Interface,
nodeInformer coreinformers.NodeInformer,
nodeSlicePoolInformer whereaboutsInformers.NodeSlicePoolInformer,
nadInformer nadinformers.NetworkAttachmentDefinitionInformer,
sortResults bool,
whereaboutsNamespace string,
) *Controller {
logger := klog.FromContext(ctx)
logger.V(4).Info("Creating event broadcaster")
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartStructuredLogging(0)
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeclientset.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerAgentName})
ratelimiter := workqueue.NewMaxOfRateLimiter(
workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 1000*time.Second),
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(50), 300)},
)
c := &Controller{
kubeclientset: kubeclientset,
nodeLister: nodeInformer.Lister(),
nodeInformer: nodeInformer,
nodesSynced: nodeInformer.Informer().HasSynced,
whereaboutsclientset: whereaboutsclientset,
nodeSlicePoolLister: nodeSlicePoolInformer.Lister(),
nodeSlicePoolInformer: nodeSlicePoolInformer,
nodeSlicePoolSynced: nodeSlicePoolInformer.Informer().HasSynced,
nadclientset: nadclientset,
nadInformer: nadInformer,
nadLister: nadInformer.Lister(),
nadSynced: nadInformer.Informer().HasSynced,
workqueue: workqueue.NewRateLimitingQueue(ratelimiter),
recorder: recorder,
sortResults: sortResults,
whereaboutsNamespace: whereaboutsNamespace,
}
logger.Info("Setting up event handlers")
nadInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: c.onNadEvent,
UpdateFunc: func(old, cur interface{}) {
oldNad := old.(*cncfV1.NetworkAttachmentDefinition)
newNad := cur.(*cncfV1.NetworkAttachmentDefinition)
if newNad.ResourceVersion == oldNad.ResourceVersion {
logger.Info("update for NAD with same resource version")
return
}
c.onNadEvent(cur)
},
DeleteFunc: c.onNadEvent,
})
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: c.requeueNADs,
DeleteFunc: c.requeueNADs,
})
return c
}
func (c *Controller) onNadEvent(obj interface{}) {
klog.Infof("handling network attachment definition event")
var object metav1.Object
var ok bool
if object, ok = obj.(metav1.Object); !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
utilruntime.HandleError(fmt.Errorf("error decoding object, invalid type"))
return
}
object, ok = tombstone.Obj.(metav1.Object)
if !ok {
utilruntime.HandleError(fmt.Errorf("error decoding object tombstone, invalid type"))
return
}
}
key, err := cache.MetaNamespaceKeyFunc(object)
klog.Info(key)
if err != nil {
utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err))
return
}
c.workqueue.Add(key)
}
// TODO: we may want to require nodes to have an annotation similar to what pods have to receive a slice
// in this case we get all applicable NADs for the node rather than requeuing all
// same applies to other node event handlers
func (c *Controller) requeueNADs(obj interface{}) {
klog.Infof("handling requeueNADs")
nadlist, err := c.nadLister.List(labels.Everything())
if err != nil {
utilruntime.HandleError(fmt.Errorf("couldn't get network-attachment-definition list from informer: %v", err))
return
}
for _, nad := range nadlist {
key, err := cache.MetaNamespaceKeyFunc(nad)
if err != nil {
utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", nad, err))
return
}
c.workqueue.Add(key)
}
}
// Run will set up the event handlers for types we are interested in, as well
// as syncing informer caches and starting workers. It will block until stopCh
// is closed, at which point it will shutdown the workqueue and wait for
// workers to finish processing their current work items.
func (c *Controller) Run(ctx context.Context, workers int) error {
defer utilruntime.HandleCrash()
defer c.workqueue.ShutDown()
logger := klog.FromContext(ctx)
// Start the informer factories to begin populating the informer caches
logger.Info("Starting node-slice controller")
// Wait for the caches to be synced before starting workers
logger.Info("Waiting for informer caches to sync")
if ok := cache.WaitForCacheSync(ctx.Done(), c.nodesSynced); !ok {
return fmt.Errorf("failed to wait for nodes caches to sync")
}
if ok := cache.WaitForCacheSync(ctx.Done(), c.nodeSlicePoolSynced); !ok {
return fmt.Errorf("failed to wait for nodeslices caches to sync")
}
if ok := cache.WaitForCacheSync(ctx.Done(), c.nadSynced); !ok {
return fmt.Errorf("failed to wait for nad caches to sync")
}
logger.Info("Starting workers", "count", workers)
// Launch two workers to process Foo resources
for i := 0; i < workers; i++ {
go wait.UntilWithContext(ctx, c.runWorker, time.Second)
}
logger.Info("Started workers")
<-ctx.Done()
logger.Info("Shutting down workers")
return nil
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// workqueue.
func (c *Controller) runWorker(ctx context.Context) {
for c.processNextWorkItem(ctx) {
}
}
// processNextWorkItem will read a single work item off the workqueue and
// attempt to process it, by calling the syncHandler.
func (c *Controller) processNextWorkItem(ctx context.Context) bool {
obj, shutdown := c.workqueue.Get()
logger := klog.FromContext(ctx)
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.workqueue.Done.
err := func(obj interface{}) error {
// We call Done here so the workqueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
// not call Forget if a transient error occurs, instead the item is
// put back on the workqueue and attempted again after a back-off
// period.
defer c.workqueue.Done(obj)
var key string
var ok bool
// We expect strings to come off the workqueue. These are of the
// form namespace/name. We do this as the delayed nature of the
// workqueue means the items in the informer cache may actually be
// more up to date that when the item was initially put onto the
// workqueue.
if key, ok = obj.(string); !ok {
// As the item in the workqueue is actually invalid, we call
// Forget here else we'd go into a loop of attempting to
// process a work item that is invalid.
c.workqueue.Forget(obj)
utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj))
return nil
}
// Run the syncHandler, passing it the namespace/name string of the
// Foo resource to be synced.
if err := c.syncHandler(ctx, key); err != nil {
// Put the item back on the workqueue to handle any transient errors.
c.workqueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
}
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.workqueue.Forget(obj)
logger.Info("Successfully synced", "resourceName", key)
return nil
}(obj)
if err != nil {
utilruntime.HandleError(err)
return true
}
return true
}
// syncHandler compares the actual state with the desired, and attempts to
// converge the two. It then updates the Status block of the Foo resource
// with the current status of the resource.
func (c *Controller) syncHandler(ctx context.Context, key string) error {
// Convert the namespace/name string into a distinct namespace and name
logger := klog.LoggerWithValues(klog.FromContext(ctx), "resourceName", key)
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key))
return nil
}
err = c.checkForMultiNadMismatch(name, namespace)
if err != nil {
return err
}
nad, err := c.nadLister.NetworkAttachmentDefinitions(namespace).Get(name)
if err != nil {
if !errors.IsNotFound(err) {
return err
}
// in this case the nad dne so it must've been deleted so we will cleanup nodeslicepools
// if we are down during the delete this could be missed similar to endpoints see kubernetes #6877
nodeSlices, err := c.nodeSlicePoolLister.List(labels.Everything())
if err != nil {
return nil
}
for _, nodeSlice := range nodeSlices {
if hasOwnerRef(nodeSlice, name) {
if len(nodeSlice.OwnerReferences) == 1 {
//this is the last NAD owning this so delete
err = c.whereaboutsclientset.WhereaboutsV1alpha1().NodeSlicePools(c.whereaboutsNamespace).Delete(ctx, name, metav1.DeleteOptions{})
if err != nil && !errors.IsNotFound(err) {
return err
}
}
}
}
return nil
}
//nad does exist so did it change node_slice_range or slice_size
ipamConf, err := ipamConfiguration(nad, "")
if err != nil {
return err
}
// This is to support several NADs and interfaces on the same network
logger.Info(fmt.Sprintf("%v", ipamConf))
logger.Info(fmt.Sprintf("slicesize: %v", ipamConf.NodeSliceSize))
if ipamConf.NodeSliceSize == "" || len(ipamConf.IPRanges) == 0 {
logger.Info("skipping update node slices for network-attachment-definition due missing node slice or range configurations",
"network-attachment-definition", klog.KRef(namespace, name))
return nil
}
logger.Info("About to update node slices for network-attachment-definition",
"network-attachment-definition", klog.KRef(namespace, name))
currentNodeSlicePool, err := c.nodeSlicePoolLister.NodeSlicePools(c.whereaboutsNamespace).Get(getSliceName(ipamConf))
if err != nil {
logger.Info("node slice pool does not exist, creating")
if !errors.IsNotFound(err) {
return err
}
//Create
nodeslice := &v1alpha1.NodeSlicePool{
TypeMeta: metav1.TypeMeta{
Kind: "NodeSlicePool",
APIVersion: "whereabouts.cni.cncf.io/v1alpha1",
},
ObjectMeta: metav1.ObjectMeta{
Name: getSliceName(ipamConf),
Namespace: c.whereaboutsNamespace,
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(nad, cncfV1.SchemeGroupVersion.WithKind("NetworkAttachmentDefinition")),
},
},
// only supports single range with node slice
Spec: v1alpha1.NodeSlicePoolSpec{
Range: ipamConf.IPRanges[0].Range,
SliceSize: ipamConf.NodeSliceSize,
},
}
allocations := []v1alpha1.NodeSliceAllocation{}
logger.Info(fmt.Sprintf("node slice: %v", nodeslice))
//TODO: handle case when full, we could fire an event
subnets, err := iphelpers.DivideRangeBySize(nodeslice.Spec.Range, ipamConf.NodeSliceSize)
if err != nil {
return err
}
logger.Info(fmt.Sprintf("subnets: %v", subnets))
for _, subnet := range subnets {
allocations = append(allocations, v1alpha1.NodeSliceAllocation{
SliceRange: subnet,
})
}
nodes, err := c.getNodeList()
if err != nil {
return err
}
for _, node := range nodes {
logger.Info(fmt.Sprintf("assigning node to slice: %v", node.Name))
assignNodeToSlice(allocations, node.Name)
}
nodeslice.Status = v1alpha1.NodeSlicePoolStatus{
Allocations: allocations,
}
logger.Info(fmt.Sprintf("final allocations: %v", allocations))
_, err = c.whereaboutsclientset.WhereaboutsV1alpha1().NodeSlicePools(c.whereaboutsNamespace).Create(ctx, nodeslice, metav1.CreateOptions{})
if err != nil {
logger.Error(err, "failed to create nodeslicepool")
return err
}
} else {
nodeslice := currentNodeSlicePool.DeepCopy()
// make sure if multiple NADs act on this NodeSlicePool they are all listed as owners
nadIsOwner := false
for _, ownerRef := range nodeslice.OwnerReferences {
if ownerRef.Name == name {
nadIsOwner = true
}
}
if !nadIsOwner {
nodeslice.OwnerReferences = append(nodeslice.OwnerReferences, getAuxiliaryOwnerRef(nad))
}
// node slice currently exists
if currentNodeSlicePool.Spec.SliceSize != ipamConf.NodeSliceSize ||
currentNodeSlicePool.Spec.Range != ipamConf.IPRanges[0].Range {
logger.Info("network-attachment-definition range or slice size changed, re-allocating node slices",
"new range", ipamConf.IPRanges[0].Range, "new slice size", ipamConf.NodeSliceSize)
// slices have changed so redo the slicing and reassign nodes
subnets, err := iphelpers.DivideRangeBySize(ipamConf.IPRanges[0].Range, ipamConf.NodeSliceSize)
if err != nil {
return err
}
allocations := []v1alpha1.NodeSliceAllocation{}
for _, subnet := range subnets {
allocations = append(allocations, v1alpha1.NodeSliceAllocation{
SliceRange: subnet,
})
}
nodes, err := c.getNodeList()
if err != nil {
return err
}
for _, node := range nodes {
assignNodeToSlice(allocations, node.Name)
}
nodeslice.Spec = v1alpha1.NodeSlicePoolSpec{
Range: ipamConf.IPRanges[0].Range,
SliceSize: ipamConf.NodeSliceSize,
}
nodeslice.Status = v1alpha1.NodeSlicePoolStatus{
Allocations: allocations,
}
_, err = c.whereaboutsclientset.WhereaboutsV1alpha1().NodeSlicePools(c.whereaboutsNamespace).Update(ctx, nodeslice, metav1.UpdateOptions{})
if err != nil {
return err
}
} else {
logger.Info("node slice exists and range configuration did not change, ensuring nodes assigned")
//slices have not changed so only make sure all nodes are assigned
allocations := nodeslice.Status.Allocations
nodes, err := c.getNodeList()
if err != nil {
return err
}
for _, node := range nodes {
assignNodeToSlice(allocations, node.Name)
}
removeUnusedNodes(allocations, nodes)
nodeslice.Status.Allocations = allocations
_, err = c.whereaboutsclientset.WhereaboutsV1alpha1().NodeSlicePools(c.whereaboutsNamespace).Update(context.TODO(), nodeslice, metav1.UpdateOptions{})
if err != nil {
logger.Info(fmt.Sprintf("Error updating NSP with no changes: %v", err))
return err
}
}
}
return nil
}
func (c *Controller) getNodeList() ([]*corev1.Node, error) {
nodes, err := c.nodeLister.List(labels.Everything())
if err != nil {
return nil, err
}
if !c.sortResults {
return nodes, nil
}
sort.Slice(nodes, func(i, j int) bool {
return nodes[i].Name < nodes[j].Name
})
return nodes, nil
}
// since multiple NADs can be attached to the same network, we need to make sure their settings match in this case
func (c *Controller) checkForMultiNadMismatch(name, namespace string) error {
nad, err := c.nadLister.NetworkAttachmentDefinitions(namespace).Get(name)
if err != nil {
if !errors.IsNotFound(err) {
return err
}
return nil
}
ipamConf, err := ipamConfiguration(nad, "")
if err != nil {
return err
}
nadList, err := c.nadLister.List(labels.Everything())
if err != nil {
return err
}
for _, additionalNad := range nadList {
additionalIpamConf, err := ipamConfiguration(additionalNad, "")
if err != nil {
return err
}
if !checkIpamConfMatch(ipamConf, additionalIpamConf) {
return fmt.Errorf("found IPAM conf mismatch for network-attachment-definitions with same network name")
}
}
return nil
}
func checkIpamConfMatch(conf1, conf2 *types.IPAMConfig) bool {
if conf1.NetworkName == conf2.NetworkName {
return conf1.IPRanges[0].Range == conf2.IPRanges[0].Range && conf1.NodeSliceSize == conf2.NodeSliceSize
}
return true
}
func hasOwnerRef(nodeSlice *v1alpha1.NodeSlicePool, name string) bool {
for _, ownerRef := range nodeSlice.OwnerReferences {
if ownerRef.Name == name {
return true
}
}
return false
}
func getSliceName(ipamConf *types.IPAMConfig) string {
sliceName := ipamConf.Name
if ipamConf.NetworkName != "" {
sliceName = ipamConf.NetworkName
}
return sliceName
}
// since multiple nads can share a nodeslicepool we need to set multiple owner refs but only
// one controller owner ref
func getAuxiliaryOwnerRef(nad *cncfV1.NetworkAttachmentDefinition) metav1.OwnerReference {
return metav1.OwnerReference{
APIVersion: nad.APIVersion,
Kind: nad.Kind,
Name: nad.Name,
UID: nad.UID,
}
}
func removeUnusedNodes(allocations []v1alpha1.NodeSliceAllocation, nodes []*corev1.Node) {
//create map for fast lookup, we only care about keys so use empty struct b/c takes up no memory
nodeMap := make(map[string]struct{}, len(nodes))
for _, node := range nodes {
nodeMap[node.Name] = struct{}{}
}
for i, allocation := range allocations {
if allocation.NodeName != "" {
if _, ok := nodeMap[allocation.NodeName]; !ok {
allocations[i] = v1alpha1.NodeSliceAllocation{
SliceRange: allocation.SliceRange,
}
}
}
}
}
func ipamConfiguration(nad *cncfV1.NetworkAttachmentDefinition, mountPath string) (*types.IPAMConfig, error) {
mounterWhereaboutsConfigFilePath := mountPath + whereaboutsConfigPath
ipamConfig, err := config.LoadIPAMConfiguration([]byte(nad.Spec.Config), "", mounterWhereaboutsConfigFilePath)
if err != nil {
return nil, err
}
return ipamConfig, nil
}
func assignNodeToSlice(allocations []v1alpha1.NodeSliceAllocation, nodeName string) {
if nodeHasAllocation(allocations, nodeName) {
return
}
for i, allocation := range allocations {
if allocation.NodeName == "" {
allocations[i] = v1alpha1.NodeSliceAllocation{
SliceRange: allocation.SliceRange,
NodeName: nodeName,
}
return
}
}
}
func nodeHasAllocation(allocations []v1alpha1.NodeSliceAllocation, nodeName string) bool {
for _, allocation := range allocations {
if allocation.NodeName == nodeName {
return true
}
}
return false
}