Move size detection pod deletion to cleanup (#3553) (#3629)

* Correct check deciding whether to delete size detection pod

This is probably not the resolving many cases but it does look
like an oversight since the size detection pod's appearance
is triggered by the target clone request and not the source.



* Move size detection pod deletion to cleanup

Apparently it's possible that the size detection pod
will stick around even if the dv is succeeded/deleted.
I think this belongs in the cleanup loop anyway



* Fix datasource watch indexing

We slap on the index key regardless of source.PVC/source.Snapshot so
it was possible for a DV with PVC source to get queued for the snapshot controller
and trigger all sorts of fun nils.



* Add datasource watch in snapshot controller as well

This is also needed for snapshot clones, guess it just merged
around the time they were introduced.



* Add check for nil pointers post CI run

This should make it easier to spot obvious nil pointers
through CI runs.



---------

Signed-off-by: Alex Kalenyuk <akalenyu@redhat.com>
This commit is contained in:
Alex Kalenyuk 2025-02-11 12:02:45 +02:00 committed by GitHub
parent ada7496b1c
commit 4ad036d298
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 172 additions and 54 deletions

View File

@ -114,6 +114,9 @@ make cluster-sync
kubectl version
echo "Nil check --PRE-- test run"
kubectl get pods -n $CDI_NAMESPACE -o'custom-columns=NAME:.metadata.name,NAMESPACE:.metadata.namespace,RESTARTS:status.containerStatuses[*].restartCount' --no-headers
ginko_params="--test-args=--ginkgo.no-color --ginkgo.junit-report=${ARTIFACTS_PATH}/junit.functest.xml"
if [[ -n "$CDI_E2E_FOCUS" ]]; then
@ -130,3 +133,7 @@ fi
# Run functional tests
TEST_ARGS=$ginko_params make test-functional
echo "Nil check --POST-- test run"
kubectl get pods -n $CDI_NAMESPACE -o'custom-columns=NAME:.metadata.name,NAMESPACE:.metadata.namespace,RESTARTS:status.containerStatuses[*].restartCount' --no-headers
kubectl logs -n $CDI_NAMESPACE $(kubectl get pod -n $CDI_NAMESPACE -l=cdi.kubevirt.io=cdi-deployment --output=jsonpath='{$.items[0].metadata.name}') --previous || echo "this is fine"

View File

@ -576,3 +576,50 @@ func addCloneWithoutSourceWatch(mgr manager.Manager, datavolumeController contro
return nil
}
func addDataSourceWatch(mgr manager.Manager, c controller.Controller, indexingKey string, op dataVolumeOp) error {
getKey := func(namespace, name string) string {
return namespace + "/" + name
}
if err := mgr.GetFieldIndexer().IndexField(context.TODO(), &cdiv1.DataVolume{}, indexingKey, func(obj client.Object) []string {
dv := obj.(*cdiv1.DataVolume)
if sourceRef := dv.Spec.SourceRef; sourceRef != nil && sourceRef.Kind == cdiv1.DataVolumeDataSource {
ns := obj.GetNamespace()
if sourceRef.Namespace != nil && *sourceRef.Namespace != "" {
ns = *sourceRef.Namespace
}
if getDataVolumeOp(context.TODO(), mgr.GetLogger(), dv, mgr.GetClient()) == op && sourceRef.Name != "" {
return []string{getKey(ns, sourceRef.Name)}
}
}
return nil
}); err != nil {
return err
}
mapToDataVolume := func(ctx context.Context, obj client.Object) []reconcile.Request {
var dvs cdiv1.DataVolumeList
matchingFields := client.MatchingFields{indexingKey: getKey(obj.GetNamespace(), obj.GetName())}
if err := mgr.GetClient().List(ctx, &dvs, matchingFields); err != nil {
c.GetLogger().Error(err, "Unable to list DataVolumes", "matchingFields", matchingFields)
return nil
}
var reqs []reconcile.Request
for _, dv := range dvs.Items {
if getDataVolumeOp(ctx, c.GetLogger(), &dv, mgr.GetClient()) != op {
continue
}
reqs = append(reqs, reconcile.Request{NamespacedName: types.NamespacedName{Namespace: dv.Namespace, Name: dv.Name}})
}
return reqs
}
if err := c.Watch(source.Kind(mgr.GetCache(), &cdiv1.DataSource{}),
handler.EnqueueRequestsFromMapFunc(mapToDataVolume),
); err != nil {
return err
}
return nil
}

View File

@ -36,10 +36,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
"kubevirt.io/containerized-data-importer/pkg/common"
@ -123,7 +121,7 @@ func (r *PvcCloneReconciler) addDataVolumeCloneControllerWatches(mgr manager.Man
return err
}
if err := addDataSourceWatch(mgr, datavolumeController); err != nil {
if err := addDataSourceWatch(mgr, datavolumeController, "pvcdatasource", dataVolumePvcClone); err != nil {
return err
}
@ -134,49 +132,6 @@ func (r *PvcCloneReconciler) addDataVolumeCloneControllerWatches(mgr manager.Man
return nil
}
func addDataSourceWatch(mgr manager.Manager, c controller.Controller) error {
const dvDataSourceField = "datasource"
getKey := func(namespace, name string) string {
return namespace + "/" + name
}
if err := mgr.GetFieldIndexer().IndexField(context.TODO(), &cdiv1.DataVolume{}, dvDataSourceField, func(obj client.Object) []string {
if sourceRef := obj.(*cdiv1.DataVolume).Spec.SourceRef; sourceRef != nil && sourceRef.Kind == cdiv1.DataVolumeDataSource {
ns := obj.GetNamespace()
if sourceRef.Namespace != nil && *sourceRef.Namespace != "" {
ns = *sourceRef.Namespace
}
return []string{getKey(ns, sourceRef.Name)}
}
return nil
}); err != nil {
return err
}
mapToDataVolume := func(ctx context.Context, obj client.Object) []reconcile.Request {
var dvs cdiv1.DataVolumeList
matchingFields := client.MatchingFields{dvDataSourceField: getKey(obj.GetNamespace(), obj.GetName())}
if err := mgr.GetClient().List(ctx, &dvs, matchingFields); err != nil {
c.GetLogger().Error(err, "Unable to list DataVolumes", "matchingFields", matchingFields)
return nil
}
var reqs []reconcile.Request
for _, dv := range dvs.Items {
reqs = append(reqs, reconcile.Request{NamespacedName: types.NamespacedName{Namespace: dv.Namespace, Name: dv.Name}})
}
return reqs
}
if err := c.Watch(source.Kind(mgr.GetCache(), &cdiv1.DataSource{}),
handler.EnqueueRequestsFromMapFunc(mapToDataVolume),
); err != nil {
return err
}
return nil
}
// Reconcile loop for the clone data volumes
func (r *PvcCloneReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
return r.reconcile(ctx, req, r)
@ -200,7 +155,75 @@ func (r *PvcCloneReconciler) cleanup(syncState *dvSyncState) error {
return nil
}
return r.reconcileVolumeCloneSourceCR(syncState)
r.log.V(3).Info("Cleanup initiated in dv pvc clone controller")
if err := r.reconcileVolumeCloneSourceCR(syncState); err != nil {
return err
}
if err := r.cleanupSizeDetectionPod(dv, syncState.pvc); err != nil {
return err
}
return nil
}
func (r *PvcCloneReconciler) cleanupSizeDetectionPod(dv *cdiv1.DataVolume, pvc *corev1.PersistentVolumeClaim) error {
if pvc != nil && !cc.ShouldDeletePod(pvc) {
return nil
}
nn := types.NamespacedName{Namespace: dv.Spec.Source.PVC.Namespace, Name: dv.Spec.Source.PVC.Name}
sourcePvc := &corev1.PersistentVolumeClaim{}
if err := r.client.Get(context.TODO(), nn, sourcePvc); err != nil {
if !k8serrors.IsNotFound(err) {
return err
}
return nil
}
pod := &corev1.Pod{
ObjectMeta: *makeSizeDetectionObjectMeta(sourcePvc),
}
if err := r.client.Get(context.TODO(), client.ObjectKeyFromObject(pod), pod); err != nil {
if !k8serrors.IsNotFound(err) {
return err
}
return nil
}
ownerRef := metav1.GetControllerOf(pod)
var hasDataVolumeOwner bool
var ownerNamespace, ownerName string
if ownerRef != nil && ownerRef.Kind == "DataVolume" {
hasDataVolumeOwner = true
ownerNamespace = pod.GetNamespace()
ownerName = ownerRef.Name
} else if hasAnnOwnedByDataVolume(pod) {
hasDataVolumeOwner = true
var err error
ownerNamespace, ownerName, err = getAnnOwnedByDataVolume(pod)
if err != nil {
return nil
}
}
if !hasDataVolumeOwner {
return nil
}
if ownerNamespace != dv.Namespace || ownerName != dv.Name {
return nil
}
if _, ok := pod.Labels[common.CDIComponentLabel]; !ok {
return nil
}
if err := r.client.Delete(context.TODO(), pod); err != nil {
if !k8serrors.IsNotFound(err) {
return err
}
}
return nil
}
func addCloneToken(dv *cdiv1.DataVolume, pvc *corev1.PersistentVolumeClaim) error {
@ -580,8 +603,8 @@ func (r *PvcCloneReconciler) getSizeFromPod(targetPvc, sourcePvc *corev1.Persist
if err := r.updateClonePVCAnnotations(sourcePvc, termMsg); err != nil {
return imgSize, err
}
// Finally, detelete the pod
if cc.ShouldDeletePod(sourcePvc) {
// Finally, delete the pod
if targetPvc != nil && cc.ShouldDeletePod(targetPvc) {
err = r.client.Delete(context.TODO(), pod)
if err != nil && !k8serrors.IsNotFound(err) {
return imgSize, err
@ -632,7 +655,7 @@ func (r *PvcCloneReconciler) makeSizeDetectionPodSpec(
return nil
}
// Generate individual specs
objectMeta := makeSizeDetectionObjectMeta(sourcePvc, dv)
objectMeta := makeSizeDetectionObjectMeta(sourcePvc)
volume := makeSizeDetectionVolumeSpec(sourcePvc.Name)
container := r.makeSizeDetectionContainerSpec(volume.Name)
if container == nil {
@ -683,7 +706,7 @@ func (r *PvcCloneReconciler) makeSizeDetectionPodSpec(
}
// makeSizeDetectionObjectMeta creates and returns the object metadata for the size-detection pod
func makeSizeDetectionObjectMeta(sourcePvc *corev1.PersistentVolumeClaim, dataVolume *cdiv1.DataVolume) *metav1.ObjectMeta {
func makeSizeDetectionObjectMeta(sourcePvc *corev1.PersistentVolumeClaim) *metav1.ObjectMeta {
return &metav1.ObjectMeta{
Name: sizeDetectionPodName(sourcePvc),
Namespace: sourcePvc.Namespace,

View File

@ -342,6 +342,43 @@ var _ = Describe("All DataVolume Tests", func() {
Expect(err).To(HaveOccurred())
Expect(k8serrors.IsNotFound(err)).To(BeTrue())
})
It("should delete size detection pod on success", func() {
dv := newCloneDataVolumeWithEmptyStorage("test-dv", "other")
dv.Status.Phase = cdiv1.Succeeded
anno := map[string]string{
AnnExtendedCloneToken: "test-token",
AnnCloneType: string(cdiv1.CloneStrategySnapshot),
populators.AnnClonePhase: clone.SucceededPhaseName,
AnnUsePopulator: "true",
}
annKubevirt := map[string]string{AnnContentType: "kubevirt"}
sourcePvc := CreatePvcInStorageClass("test", "other", &scName, annKubevirt, nil, corev1.ClaimBound)
pvc := CreatePvcInStorageClass("test-dv", metav1.NamespaceDefault, &scName, anno, nil, corev1.ClaimBound)
pvc.Spec.DataSourceRef = &corev1.TypedObjectReference{
Kind: cdiv1.VolumeCloneSourceRef,
Name: volumeCloneSourceName(dv),
}
pvc.OwnerReferences = append(pvc.OwnerReferences, metav1.OwnerReference{
Kind: "DataVolume",
Controller: ptr.To[bool](true),
Name: "test-dv",
UID: dv.UID,
})
pod := &corev1.Pod{
ObjectMeta: *makeSizeDetectionObjectMeta(sourcePvc),
}
err := setAnnOwnedByDataVolume(pod, dv)
Expect(err).ToNot(HaveOccurred())
reconciler = createCloneReconciler(storageClass, csiDriver, dv, sourcePvc, pvc, pod)
result, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}})
Expect(err).ToNot(HaveOccurred())
Expect(result.Requeue).To(BeFalse())
Expect(result.RequeueAfter).To(BeZero())
err = reconciler.client.Get(context.TODO(), client.ObjectKeyFromObject(pod), pod)
Expect(err).To(HaveOccurred())
Expect(k8serrors.IsNotFound(err)).To(BeTrue())
})
})
})

View File

@ -132,6 +132,10 @@ func (r *SnapshotCloneReconciler) addDataVolumeSnapshotCloneControllerWatches(mg
return err
}
if err := addDataSourceWatch(mgr, datavolumeController, "snapshotdatasource", dataVolumeSnapshotClone); err != nil {
return err
}
if err := r.addVolumeCloneSourceWatch(mgr, datavolumeController); err != nil {
return err
}

View File

@ -1268,7 +1268,6 @@ var _ = Describe("all clone tests", func() {
It("[test_id:8498]Should only use size-detection pod when cloning a PVC for the first time", func() {
dataVolume := utils.NewDataVolumeWithHTTPImportAndStorageSpec(dataVolumeName, "200Mi", fmt.Sprintf(utils.TinyCoreIsoURL, f.CdiInstallNs))
dataVolume.Spec.Storage.VolumeMode = &volumeMode
controller.AddAnnotation(dataVolume, controller.AnnPodRetainAfterCompletion, "true")
dataVolume, err := utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, dataVolume)
Expect(err).ToNot(HaveOccurred())
f.ForceBindPvcIfDvIsWaitForFirstConsumer(dataVolume)
@ -1282,6 +1281,7 @@ var _ = Describe("all clone tests", func() {
// We attempt to create the sizeless clone
targetDataVolume := utils.NewDataVolumeForCloningWithEmptySize("target-dv", sourcePvc.Namespace, sourcePvc.Name, nil, &volumeMode)
controller.AddAnnotation(targetDataVolume, controller.AnnDeleteAfterCompletion, "false")
controller.AddAnnotation(targetDataVolume, controller.AnnPodRetainAfterCompletion, "true")
targetDataVolume, err = utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, targetDataVolume)
Expect(err).ToNot(HaveOccurred())
@ -1332,7 +1332,6 @@ var _ = Describe("all clone tests", func() {
It("[test_id:8762]Should use size-detection pod when cloning if the source PVC has changed its original capacity", func() {
dataVolume := utils.NewDataVolumeWithHTTPImportAndStorageSpec(dataVolumeName, "1Gi", fmt.Sprintf(utils.TinyCoreIsoURL, f.CdiInstallNs))
dataVolume.Spec.Storage.VolumeMode = &volumeMode
controller.AddAnnotation(dataVolume, controller.AnnPodRetainAfterCompletion, "true")
dataVolume, err := utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, dataVolume)
Expect(err).ToNot(HaveOccurred())
f.ForceBindPvcIfDvIsWaitForFirstConsumer(dataVolume)
@ -1346,6 +1345,7 @@ var _ = Describe("all clone tests", func() {
// We attempt to create the sizeless clone
targetDV := utils.NewDataVolumeForCloningWithEmptySize("target-dv", sourcePvc.Namespace, sourcePvc.Name, nil, &volumeMode)
controller.AddAnnotation(targetDV, controller.AnnDeleteAfterCompletion, "false")
controller.AddAnnotation(targetDV, controller.AnnPodRetainAfterCompletion, "true")
targetDataVolume, err := utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, targetDV)
Expect(err).ToNot(HaveOccurred())
@ -1406,7 +1406,6 @@ var _ = Describe("all clone tests", func() {
It("Should clone using size-detection pod across namespaces", func() {
dataVolume := utils.NewDataVolumeWithHTTPImportAndStorageSpec(dataVolumeName, "200Mi", fmt.Sprintf(utils.TinyCoreIsoURL, f.CdiInstallNs))
dataVolume.Spec.Storage.VolumeMode = &volumeMode
controller.AddAnnotation(dataVolume, controller.AnnPodRetainAfterCompletion, "true")
dataVolume, err := utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, dataVolume)
Expect(err).ToNot(HaveOccurred())
f.ForceBindPvcIfDvIsWaitForFirstConsumer(dataVolume)
@ -1427,6 +1426,7 @@ var _ = Describe("all clone tests", func() {
// We attempt to create the sizeless clone
targetDataVolume := utils.NewDataVolumeForCloningWithEmptySize("target-dv", f.Namespace.Name, sourcePvc.Name, nil, &volumeMode)
controller.AddAnnotation(targetDataVolume, controller.AnnDeleteAfterCompletion, "false")
controller.AddAnnotation(targetDataVolume, controller.AnnPodRetainAfterCompletion, "true")
targetDataVolume, err = utils.CreateDataVolumeFromDefinition(f.CdiClient, targetNs.Name, targetDataVolume)
Expect(err).ToNot(HaveOccurred())