mirror of
https://github.com/kubevirt/containerized-data-importer.git
synced 2025-06-03 06:30:22 +00:00
Add a field to DataVolume to track the number of retries/pod restarts (#1155)
* Add a field to DataVolume to track the number of retries/pod restarts Signed-off-by: Bartosz Rybacki <brybacki@redhat.com> * Add a field to DataVolume to track the number of retries/pod restarts Signed-off-by: Bartosz Rybacki <brybacki@redhat.com> * Make RESTARTS non-empty on DataVolume (shows as as 0 'zero') Signed-off-by: Bartosz Rybacki <brybacki@redhat.com> * Test reporting restarts on DataVolume when importing. Signed-off-by: Bartosz Rybacki <brybacki@redhat.com> * Fix tests Signed-off-by: Bartosz Rybacki <brybacki@redhat.com> * Code review fixes Signed-off-by: Bartosz Rybacki <brybacki@redhat.com> * Restart Count status test for upload and clone controller Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>
This commit is contained in:
parent
84cf68d926
commit
4605cf1dc2
@ -3241,6 +3241,9 @@
|
||||
},
|
||||
"v1alpha1.DataVolumeStatus": {
|
||||
"description": "DataVolumeStatus provides the parameters to store the phase of the Data Volume",
|
||||
"required": [
|
||||
"restartCount"
|
||||
],
|
||||
"properties": {
|
||||
"phase": {
|
||||
"description": "Phase is the current phase of the data volume",
|
||||
@ -3248,6 +3251,10 @@
|
||||
},
|
||||
"progress": {
|
||||
"type": "string"
|
||||
},
|
||||
"restartCount": {
|
||||
"type": "integer",
|
||||
"format": "int32"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -759,7 +759,14 @@ func schema_pkg_apis_core_v1alpha1_DataVolumeStatus(ref common.ReferenceCallback
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"restartCount": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"integer"},
|
||||
Format: "int32",
|
||||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"restartCount"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -126,8 +126,9 @@ type DataVolumeSourceImageIO struct {
|
||||
// DataVolumeStatus provides the parameters to store the phase of the Data Volume
|
||||
type DataVolumeStatus struct {
|
||||
//Phase is the current phase of the data volume
|
||||
Phase DataVolumePhase `json:"phase,omitempty"`
|
||||
Progress DataVolumeProgress `json:"progress,omitempty"`
|
||||
Phase DataVolumePhase `json:"phase,omitempty"`
|
||||
Progress DataVolumeProgress `json:"progress,omitempty"`
|
||||
RestartCount int32 `json:"restartCount"`
|
||||
}
|
||||
|
||||
//DataVolumeList provides the needed parameters to do request a list of Data Volumes from the system
|
||||
|
@ -217,6 +217,16 @@ func (r *CloneReconciler) updatePvcFromPod(sourcePod *corev1.Pod, pvc *corev1.Pe
|
||||
pvc.Annotations[AnnCloneOf] = "true"
|
||||
r.recorder.Event(pvc, corev1.EventTypeNormal, CloneSucceededPVC, "Clone Successful")
|
||||
}
|
||||
if sourcePod != nil && sourcePod.Status.ContainerStatuses != nil {
|
||||
// update pvc annotation tracking pod restarts only if the source pod restart count is greater
|
||||
// see the same in upload-controller
|
||||
annPodRestarts, _ := strconv.Atoi(pvc.Annotations[AnnPodRestarts])
|
||||
podRestarts := int(sourcePod.Status.ContainerStatuses[0].RestartCount)
|
||||
if podRestarts > annPodRestarts {
|
||||
pvc.Annotations[AnnPodRestarts] = strconv.Itoa(podRestarts)
|
||||
}
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(currentPvcCopy, pvc) {
|
||||
return r.updatePVC(pvc)
|
||||
}
|
||||
|
@ -366,6 +366,78 @@ var _ = Describe("CloneSourcePodName", func() {
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("Update PVC", func() {
|
||||
var (
|
||||
reconciler *CloneReconciler
|
||||
)
|
||||
AfterEach(func() {
|
||||
if reconciler != nil {
|
||||
close(reconciler.recorder.(*record.FakeRecorder).Events)
|
||||
reconciler = nil
|
||||
}
|
||||
})
|
||||
It("Should update AnnPodRestarts on pvc from source pod restarts", func() {
|
||||
testPvc := createPvc("testPvc1", "default", map[string]string{AnnCloneRequest: "default/test"}, nil)
|
||||
pod := createSourcePod(testPvc, string(testPvc.GetUID()))
|
||||
pod.Status = corev1.PodStatus{
|
||||
Phase: corev1.PodFailed,
|
||||
ContainerStatuses: []corev1.ContainerStatus{
|
||||
{
|
||||
RestartCount: 2,
|
||||
LastTerminationState: corev1.ContainerState{
|
||||
Terminated: &corev1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
Message: "I went poof",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
reconciler = createCloneReconciler(testPvc, createPvc("source", "default", map[string]string{}, nil))
|
||||
|
||||
err := reconciler.updatePvcFromPod(pod, testPvc, reconciler.Log)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
By("Verifying the pvc has original restart count")
|
||||
actualPvc := &corev1.PersistentVolumeClaim{}
|
||||
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("2"))
|
||||
})
|
||||
|
||||
It("Should not update AnnPodRestarts on pvc from source pod if pod has lower restart count value", func() {
|
||||
testPvc := createPvc("testPvc1", "default", map[string]string{
|
||||
AnnCloneRequest: "default/test",
|
||||
AnnPodRestarts: "3"},
|
||||
nil)
|
||||
pod := createSourcePod(testPvc, string(testPvc.GetUID()))
|
||||
pod.Status = corev1.PodStatus{
|
||||
Phase: corev1.PodFailed,
|
||||
ContainerStatuses: []corev1.ContainerStatus{
|
||||
{
|
||||
RestartCount: 2,
|
||||
LastTerminationState: corev1.ContainerState{
|
||||
Terminated: &corev1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
Message: "I went poof",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
reconciler = createCloneReconciler(testPvc, createPvc("source", "default", map[string]string{}, nil))
|
||||
|
||||
err := reconciler.updatePvcFromPod(pod, testPvc, reconciler.Log)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
By("Verifying the pvc has original restart count")
|
||||
actualPvc := &corev1.PersistentVolumeClaim{}
|
||||
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("3"))
|
||||
})
|
||||
})
|
||||
|
||||
func createCloneReconciler(objects ...runtime.Object) *CloneReconciler {
|
||||
objs := []runtime.Object{}
|
||||
objs = append(objs, objects...)
|
||||
|
@ -277,6 +277,7 @@ func (r *DatavolumeReconciler) reconcileProgressUpdate(datavolume *cdiv1.DataVol
|
||||
if datavolume.Status.Progress == "" {
|
||||
datavolume.Status.Progress = "N/A"
|
||||
}
|
||||
|
||||
if datavolume.Spec.Source.HTTP != nil {
|
||||
podNamespace = datavolume.Namespace
|
||||
} else if datavolume.Spec.Source.PVC != nil {
|
||||
@ -603,6 +604,11 @@ func (r *DatavolumeReconciler) reconcileDataVolumeStatus(dataVolume *cdiv1.DataV
|
||||
}
|
||||
}
|
||||
|
||||
if pvc != nil {
|
||||
if i, err := strconv.Atoi(pvc.Annotations[AnnPodRestarts]); err == nil && i >= 0 {
|
||||
dataVolumeCopy.Status.RestartCount = int32(i)
|
||||
}
|
||||
}
|
||||
result := reconcile.Result{}
|
||||
var err error
|
||||
if pvc != nil {
|
||||
@ -749,6 +755,7 @@ func newPersistentVolumeClaim(dataVolume *cdiv1.DataVolume) (*corev1.PersistentV
|
||||
annotations[k] = v
|
||||
}
|
||||
|
||||
annotations[AnnPodRestarts] = "0"
|
||||
if dataVolume.Spec.Source.HTTP != nil {
|
||||
annotations[AnnEndpoint] = dataVolume.Spec.Source.HTTP.URL
|
||||
annotations[AnnSource] = SourceHTTP
|
||||
|
@ -133,6 +133,33 @@ var _ = Describe("Datavolume controller reconcile loop", func() {
|
||||
Expect(dv.Status.Phase).To(Equal(cdiv1.Pending))
|
||||
})
|
||||
|
||||
It("Should follow the restarts of the PVC", func() {
|
||||
reconciler = createDatavolumeReconciler(newImportDataVolume("test-dv"))
|
||||
_, err := reconciler.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
pvc := &corev1.PersistentVolumeClaim{}
|
||||
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}, pvc)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(pvc.Name).To(Equal("test-dv"))
|
||||
|
||||
dv := &cdiv1.DataVolume{}
|
||||
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}, dv)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(dv.Status.RestartCount).To(Equal(int32(0)))
|
||||
|
||||
pvc.Annotations[AnnPodRestarts] = "2"
|
||||
err = reconciler.Client.Update(context.TODO(), pvc)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, err = reconciler.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
dv = &cdiv1.DataVolume{}
|
||||
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}, dv)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(dv.Status.RestartCount).To(Equal(int32(2)))
|
||||
})
|
||||
|
||||
It("Should error if a PVC with same name already exists that is not owned by us", func() {
|
||||
reconciler = createDatavolumeReconciler(createPvc("test-dv", metav1.NamespaceDefault, map[string]string{}, nil), newImportDataVolume("test-dv"))
|
||||
_, err := reconciler.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}})
|
||||
|
@ -219,7 +219,8 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
|
||||
log.V(1).Info("Updating PVC from pod")
|
||||
anno := pvc.GetAnnotations()
|
||||
scratchExitCode := false
|
||||
if pod.Status.ContainerStatuses != nil && pod.Status.ContainerStatuses[0].LastTerminationState.Terminated != nil &&
|
||||
if pod.Status.ContainerStatuses != nil &&
|
||||
pod.Status.ContainerStatuses[0].LastTerminationState.Terminated != nil &&
|
||||
pod.Status.ContainerStatuses[0].LastTerminationState.Terminated.ExitCode > 0 {
|
||||
log.Info("Pod termination code", "pod.Name", pod.Name, "ExitCode", pod.Status.ContainerStatuses[0].LastTerminationState.Terminated.ExitCode)
|
||||
if pod.Status.ContainerStatuses[0].LastTerminationState.Terminated.ExitCode == common.ScratchSpaceNeededExitCode {
|
||||
@ -231,6 +232,9 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
|
||||
}
|
||||
}
|
||||
|
||||
if pod.Status.ContainerStatuses != nil {
|
||||
anno[AnnPodRestarts] = strconv.Itoa(int(pod.Status.ContainerStatuses[0].RestartCount))
|
||||
}
|
||||
anno[AnnImportPod] = string(pod.Name)
|
||||
// Even if scratch space is needed, the pod state will still remain running, until the new pod is started.
|
||||
anno[AnnPodPhase] = string(pod.Status.Phase)
|
||||
@ -254,7 +258,7 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
|
||||
if err := r.updatePVC(pvc, log); err != nil {
|
||||
return err
|
||||
}
|
||||
log.V(1).Info("Updated PVC", "pvc.anno.Phase", anno[AnnPodPhase])
|
||||
log.V(1).Info("Updated PVC", "pvc.anno.Phase", anno[AnnPodPhase], "pvc.anno.Restarts", anno[AnnPodRestarts])
|
||||
}
|
||||
|
||||
if isPVCComplete(pvc) || scratchExitCode {
|
||||
@ -271,6 +275,7 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
|
||||
|
||||
func (r *ImportReconciler) updatePVC(pvc *corev1.PersistentVolumeClaim, log logr.Logger) error {
|
||||
log.V(1).Info("Phase is now", "pvc.anno.Phase", pvc.GetAnnotations()[AnnPodPhase])
|
||||
log.V(1).Info("Restarts is now", "pvc.anno.Restarts", pvc.GetAnnotations()[AnnPodRestarts])
|
||||
if err := r.Client.Update(context.TODO(), pvc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -291,6 +291,7 @@ var _ = Describe("Update PVC from POD", func() {
|
||||
Phase: corev1.PodFailed,
|
||||
ContainerStatuses: []corev1.ContainerStatus{
|
||||
{
|
||||
RestartCount: 2,
|
||||
LastTerminationState: corev1.ContainerState{
|
||||
Terminated: &corev1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
@ -309,6 +310,7 @@ var _ = Describe("Update PVC from POD", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resPvc.GetAnnotations()[AnnPodPhase]).To(BeEquivalentTo(corev1.PodFailed))
|
||||
Expect(resPvc.GetAnnotations()[AnnImportPod]).To(Equal(pod.Name))
|
||||
Expect(resPvc.GetAnnotations()[AnnPodRestarts]).To(Equal("2"))
|
||||
By("Checking error event recorded")
|
||||
event := <-reconciler.recorder.(*record.FakeRecorder).Events
|
||||
Expect(event).To(ContainSubstring("I went poof"))
|
||||
@ -340,6 +342,7 @@ var _ = Describe("Update PVC from POD", func() {
|
||||
By("Verifying that the phase hasn't changed")
|
||||
Expect(resPvc.GetAnnotations()[AnnPodPhase]).To(BeEquivalentTo(corev1.PodRunning))
|
||||
Expect(resPvc.GetAnnotations()[AnnImportPod]).To(Equal(pod.Name))
|
||||
Expect(resPvc.GetAnnotations()[AnnPodRestarts]).To(Equal("0"))
|
||||
// No scratch space because the pod is not in pending.
|
||||
})
|
||||
})
|
||||
|
@ -164,6 +164,16 @@ func (r *UploadReconciler) reconcilePVC(log logr.Logger, pvc *corev1.PersistentV
|
||||
pvcCopy.Annotations[AnnPodPhase] = string(podPhase)
|
||||
pvcCopy.Annotations[AnnPodReady] = strconv.FormatBool(isPodReady(pod))
|
||||
|
||||
if pod.Status.ContainerStatuses != nil {
|
||||
// update pvc annotation tracking pod restarts only if the source pod restart count is greater
|
||||
// see the same in clone-controller
|
||||
pvcAnnPodRestarts, _ := strconv.Atoi(pvcCopy.Annotations[AnnPodRestarts])
|
||||
podRestarts := int(pod.Status.ContainerStatuses[0].RestartCount)
|
||||
if podRestarts > pvcAnnPodRestarts {
|
||||
pvcCopy.Annotations[AnnPodRestarts] = strconv.Itoa(podRestarts)
|
||||
}
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(pvc, pvcCopy) {
|
||||
if err := r.updatePVC(pvcCopy); err != nil {
|
||||
return reconcile.Result{}, err
|
||||
|
@ -304,6 +304,76 @@ var _ = Describe("reconcilePVC loop", func() {
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("Update PVC", func() {
|
||||
|
||||
It("Should update AnnPodRestarts on pvc from upload pod restarts", func() {
|
||||
testPvc := createPvc("testPvc1", "default",
|
||||
map[string]string{
|
||||
AnnUploadRequest: "",
|
||||
AnnPodPhase: string(corev1.PodPending),
|
||||
AnnPodRestarts: "1"}, nil)
|
||||
pod := createUploadPod(testPvc)
|
||||
pod.Status = corev1.PodStatus{
|
||||
Phase: corev1.PodFailed,
|
||||
ContainerStatuses: []corev1.ContainerStatus{
|
||||
{
|
||||
RestartCount: 2,
|
||||
LastTerminationState: corev1.ContainerState{
|
||||
Terminated: &corev1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
Message: "I went poof",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
reconciler := createUploadReconciler(testPvc, pod, createUploadService(testPvc))
|
||||
|
||||
_, err := reconciler.reconcilePVC(reconciler.Log, testPvc, false)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
By("Verifying the pvc has restarts updated")
|
||||
actualPvc := &corev1.PersistentVolumeClaim{}
|
||||
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("2"))
|
||||
})
|
||||
|
||||
It("Should not update AnnPodRestarts on pvc from pod if pod has lower restart count value ", func() {
|
||||
testPvc := createPvc("testPvc1", "default",
|
||||
map[string]string{
|
||||
AnnUploadRequest: "",
|
||||
AnnPodPhase: string(corev1.PodPending),
|
||||
AnnPodRestarts: "3"},
|
||||
nil)
|
||||
pod := createUploadPod(testPvc)
|
||||
pod.Status = corev1.PodStatus{
|
||||
Phase: corev1.PodFailed,
|
||||
ContainerStatuses: []corev1.ContainerStatus{
|
||||
{
|
||||
RestartCount: 2,
|
||||
LastTerminationState: corev1.ContainerState{
|
||||
Terminated: &corev1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
Message: "I went poof",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
reconciler := createUploadReconciler(testPvc, pod, createUploadService(testPvc))
|
||||
|
||||
_, err := reconciler.reconcilePVC(reconciler.Log, testPvc, false)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
By("Verifying the pvc has original restart count")
|
||||
actualPvc := &corev1.PersistentVolumeClaim{}
|
||||
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("3"))
|
||||
})
|
||||
})
|
||||
|
||||
func createUploadReconciler(objects ...runtime.Object) *UploadReconciler {
|
||||
objs := []runtime.Object{}
|
||||
objs = append(objs, objects...)
|
||||
|
@ -63,6 +63,8 @@ const (
|
||||
AnnPodReady = AnnAPIGroup + "/storage.pod.ready"
|
||||
// AnnOwnerRef is used when owner is in a different namespace
|
||||
AnnOwnerRef = AnnAPIGroup + "/storage.ownerRef"
|
||||
// AnnPodRestarts is a PVC annotation that tells how many times a related pod was restarted
|
||||
AnnPodRestarts = AnnAPIGroup + "/storage.pod.restarts"
|
||||
// SourceImageio is the source type ovirt-imageio
|
||||
SourceImageio = "imageio"
|
||||
)
|
||||
|
@ -165,6 +165,12 @@ func createDataVolumeCRD() *extv1beta1.CustomResourceDefinition {
|
||||
Description: "Transfer progress in percentage if known, N/A otherwise",
|
||||
JSONPath: ".status.progress",
|
||||
},
|
||||
{
|
||||
Name: "Restarts",
|
||||
Type: "integer",
|
||||
Description: "The number of times the transfer has been restarted.",
|
||||
JSONPath: ".status.restartCount",
|
||||
},
|
||||
{
|
||||
Name: "Age",
|
||||
Type: "date",
|
||||
|
@ -358,4 +358,23 @@ var _ = Describe("[vendor:cnv-qe@redhat.com][level:component]DataVolume tests",
|
||||
}, timeout, pollingInterval).Should(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Restarts reporting on import datavolume", func() {
|
||||
It("Should report restarts on failed import", func() {
|
||||
dataVolume := utils.NewDataVolumeWithHTTPImport(dataVolumeName, "1Gi", InvalidQcowImagesURL)
|
||||
By(fmt.Sprintf("creating new datavolume %s", dataVolume.Name))
|
||||
dataVolume, err := utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, dataVolume)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
By("Verify dv")
|
||||
Eventually(func() int32 {
|
||||
dv, err := f.CdiClient.CdiV1alpha1().DataVolumes(f.Namespace.Name).Get(dataVolume.Name, metav1.GetOptions{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
restarts := dv.Status.RestartCount
|
||||
return restarts
|
||||
}, timeout, pollingInterval).Should(BeNumerically(">=", 1))
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user