Add a field to DataVolume to track the number of retries/pod restarts (#1155)

* Add a field to DataVolume to track the number of retries/pod restarts

Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>

* Add a field to DataVolume to track the number of retries/pod restarts

Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>

* Make RESTARTS non-empty on DataVolume (shows as as 0 'zero')

Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>

* Test reporting restarts on DataVolume when importing.

Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>

* Fix tests

Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>

* Code review fixes

Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>

* Restart Count status test for upload and clone controller

Signed-off-by: Bartosz Rybacki <brybacki@redhat.com>
This commit is contained in:
Bartosz Rybacki 2020-03-30 23:17:49 +02:00 committed by GitHub
parent 84cf68d926
commit 4605cf1dc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 250 additions and 4 deletions

View File

@ -3241,6 +3241,9 @@
},
"v1alpha1.DataVolumeStatus": {
"description": "DataVolumeStatus provides the parameters to store the phase of the Data Volume",
"required": [
"restartCount"
],
"properties": {
"phase": {
"description": "Phase is the current phase of the data volume",
@ -3248,6 +3251,10 @@
},
"progress": {
"type": "string"
},
"restartCount": {
"type": "integer",
"format": "int32"
}
}
},

View File

@ -759,7 +759,14 @@ func schema_pkg_apis_core_v1alpha1_DataVolumeStatus(ref common.ReferenceCallback
Format: "",
},
},
"restartCount": {
SchemaProps: spec.SchemaProps{
Type: []string{"integer"},
Format: "int32",
},
},
},
Required: []string{"restartCount"},
},
},
}

View File

@ -126,8 +126,9 @@ type DataVolumeSourceImageIO struct {
// DataVolumeStatus provides the parameters to store the phase of the Data Volume
type DataVolumeStatus struct {
//Phase is the current phase of the data volume
Phase DataVolumePhase `json:"phase,omitempty"`
Progress DataVolumeProgress `json:"progress,omitempty"`
Phase DataVolumePhase `json:"phase,omitempty"`
Progress DataVolumeProgress `json:"progress,omitempty"`
RestartCount int32 `json:"restartCount"`
}
//DataVolumeList provides the needed parameters to do request a list of Data Volumes from the system

View File

@ -217,6 +217,16 @@ func (r *CloneReconciler) updatePvcFromPod(sourcePod *corev1.Pod, pvc *corev1.Pe
pvc.Annotations[AnnCloneOf] = "true"
r.recorder.Event(pvc, corev1.EventTypeNormal, CloneSucceededPVC, "Clone Successful")
}
if sourcePod != nil && sourcePod.Status.ContainerStatuses != nil {
// update pvc annotation tracking pod restarts only if the source pod restart count is greater
// see the same in upload-controller
annPodRestarts, _ := strconv.Atoi(pvc.Annotations[AnnPodRestarts])
podRestarts := int(sourcePod.Status.ContainerStatuses[0].RestartCount)
if podRestarts > annPodRestarts {
pvc.Annotations[AnnPodRestarts] = strconv.Itoa(podRestarts)
}
}
if !reflect.DeepEqual(currentPvcCopy, pvc) {
return r.updatePVC(pvc)
}

View File

@ -366,6 +366,78 @@ var _ = Describe("CloneSourcePodName", func() {
})
})
var _ = Describe("Update PVC", func() {
var (
reconciler *CloneReconciler
)
AfterEach(func() {
if reconciler != nil {
close(reconciler.recorder.(*record.FakeRecorder).Events)
reconciler = nil
}
})
It("Should update AnnPodRestarts on pvc from source pod restarts", func() {
testPvc := createPvc("testPvc1", "default", map[string]string{AnnCloneRequest: "default/test"}, nil)
pod := createSourcePod(testPvc, string(testPvc.GetUID()))
pod.Status = corev1.PodStatus{
Phase: corev1.PodFailed,
ContainerStatuses: []corev1.ContainerStatus{
{
RestartCount: 2,
LastTerminationState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
Message: "I went poof",
},
},
},
},
}
reconciler = createCloneReconciler(testPvc, createPvc("source", "default", map[string]string{}, nil))
err := reconciler.updatePvcFromPod(pod, testPvc, reconciler.Log)
Expect(err).ToNot(HaveOccurred())
By("Verifying the pvc has original restart count")
actualPvc := &corev1.PersistentVolumeClaim{}
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
Expect(err).ToNot(HaveOccurred())
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("2"))
})
It("Should not update AnnPodRestarts on pvc from source pod if pod has lower restart count value", func() {
testPvc := createPvc("testPvc1", "default", map[string]string{
AnnCloneRequest: "default/test",
AnnPodRestarts: "3"},
nil)
pod := createSourcePod(testPvc, string(testPvc.GetUID()))
pod.Status = corev1.PodStatus{
Phase: corev1.PodFailed,
ContainerStatuses: []corev1.ContainerStatus{
{
RestartCount: 2,
LastTerminationState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
Message: "I went poof",
},
},
},
},
}
reconciler = createCloneReconciler(testPvc, createPvc("source", "default", map[string]string{}, nil))
err := reconciler.updatePvcFromPod(pod, testPvc, reconciler.Log)
Expect(err).ToNot(HaveOccurred())
By("Verifying the pvc has original restart count")
actualPvc := &corev1.PersistentVolumeClaim{}
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
Expect(err).ToNot(HaveOccurred())
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("3"))
})
})
func createCloneReconciler(objects ...runtime.Object) *CloneReconciler {
objs := []runtime.Object{}
objs = append(objs, objects...)

View File

@ -277,6 +277,7 @@ func (r *DatavolumeReconciler) reconcileProgressUpdate(datavolume *cdiv1.DataVol
if datavolume.Status.Progress == "" {
datavolume.Status.Progress = "N/A"
}
if datavolume.Spec.Source.HTTP != nil {
podNamespace = datavolume.Namespace
} else if datavolume.Spec.Source.PVC != nil {
@ -603,6 +604,11 @@ func (r *DatavolumeReconciler) reconcileDataVolumeStatus(dataVolume *cdiv1.DataV
}
}
if pvc != nil {
if i, err := strconv.Atoi(pvc.Annotations[AnnPodRestarts]); err == nil && i >= 0 {
dataVolumeCopy.Status.RestartCount = int32(i)
}
}
result := reconcile.Result{}
var err error
if pvc != nil {
@ -749,6 +755,7 @@ func newPersistentVolumeClaim(dataVolume *cdiv1.DataVolume) (*corev1.PersistentV
annotations[k] = v
}
annotations[AnnPodRestarts] = "0"
if dataVolume.Spec.Source.HTTP != nil {
annotations[AnnEndpoint] = dataVolume.Spec.Source.HTTP.URL
annotations[AnnSource] = SourceHTTP

View File

@ -133,6 +133,33 @@ var _ = Describe("Datavolume controller reconcile loop", func() {
Expect(dv.Status.Phase).To(Equal(cdiv1.Pending))
})
It("Should follow the restarts of the PVC", func() {
reconciler = createDatavolumeReconciler(newImportDataVolume("test-dv"))
_, err := reconciler.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}})
Expect(err).ToNot(HaveOccurred())
pvc := &corev1.PersistentVolumeClaim{}
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}, pvc)
Expect(err).ToNot(HaveOccurred())
Expect(pvc.Name).To(Equal("test-dv"))
dv := &cdiv1.DataVolume{}
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}, dv)
Expect(err).ToNot(HaveOccurred())
Expect(dv.Status.RestartCount).To(Equal(int32(0)))
pvc.Annotations[AnnPodRestarts] = "2"
err = reconciler.Client.Update(context.TODO(), pvc)
Expect(err).ToNot(HaveOccurred())
_, err = reconciler.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}})
Expect(err).ToNot(HaveOccurred())
dv = &cdiv1.DataVolume{}
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}, dv)
Expect(err).ToNot(HaveOccurred())
Expect(dv.Status.RestartCount).To(Equal(int32(2)))
})
It("Should error if a PVC with same name already exists that is not owned by us", func() {
reconciler = createDatavolumeReconciler(createPvc("test-dv", metav1.NamespaceDefault, map[string]string{}, nil), newImportDataVolume("test-dv"))
_, err := reconciler.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "test-dv", Namespace: metav1.NamespaceDefault}})

View File

@ -219,7 +219,8 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
log.V(1).Info("Updating PVC from pod")
anno := pvc.GetAnnotations()
scratchExitCode := false
if pod.Status.ContainerStatuses != nil && pod.Status.ContainerStatuses[0].LastTerminationState.Terminated != nil &&
if pod.Status.ContainerStatuses != nil &&
pod.Status.ContainerStatuses[0].LastTerminationState.Terminated != nil &&
pod.Status.ContainerStatuses[0].LastTerminationState.Terminated.ExitCode > 0 {
log.Info("Pod termination code", "pod.Name", pod.Name, "ExitCode", pod.Status.ContainerStatuses[0].LastTerminationState.Terminated.ExitCode)
if pod.Status.ContainerStatuses[0].LastTerminationState.Terminated.ExitCode == common.ScratchSpaceNeededExitCode {
@ -231,6 +232,9 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
}
}
if pod.Status.ContainerStatuses != nil {
anno[AnnPodRestarts] = strconv.Itoa(int(pod.Status.ContainerStatuses[0].RestartCount))
}
anno[AnnImportPod] = string(pod.Name)
// Even if scratch space is needed, the pod state will still remain running, until the new pod is started.
anno[AnnPodPhase] = string(pod.Status.Phase)
@ -254,7 +258,7 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
if err := r.updatePVC(pvc, log); err != nil {
return err
}
log.V(1).Info("Updated PVC", "pvc.anno.Phase", anno[AnnPodPhase])
log.V(1).Info("Updated PVC", "pvc.anno.Phase", anno[AnnPodPhase], "pvc.anno.Restarts", anno[AnnPodRestarts])
}
if isPVCComplete(pvc) || scratchExitCode {
@ -271,6 +275,7 @@ func (r *ImportReconciler) updatePvcFromPod(pvc *corev1.PersistentVolumeClaim, p
func (r *ImportReconciler) updatePVC(pvc *corev1.PersistentVolumeClaim, log logr.Logger) error {
log.V(1).Info("Phase is now", "pvc.anno.Phase", pvc.GetAnnotations()[AnnPodPhase])
log.V(1).Info("Restarts is now", "pvc.anno.Restarts", pvc.GetAnnotations()[AnnPodRestarts])
if err := r.Client.Update(context.TODO(), pvc); err != nil {
return err
}

View File

@ -291,6 +291,7 @@ var _ = Describe("Update PVC from POD", func() {
Phase: corev1.PodFailed,
ContainerStatuses: []corev1.ContainerStatus{
{
RestartCount: 2,
LastTerminationState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
@ -309,6 +310,7 @@ var _ = Describe("Update PVC from POD", func() {
Expect(err).ToNot(HaveOccurred())
Expect(resPvc.GetAnnotations()[AnnPodPhase]).To(BeEquivalentTo(corev1.PodFailed))
Expect(resPvc.GetAnnotations()[AnnImportPod]).To(Equal(pod.Name))
Expect(resPvc.GetAnnotations()[AnnPodRestarts]).To(Equal("2"))
By("Checking error event recorded")
event := <-reconciler.recorder.(*record.FakeRecorder).Events
Expect(event).To(ContainSubstring("I went poof"))
@ -340,6 +342,7 @@ var _ = Describe("Update PVC from POD", func() {
By("Verifying that the phase hasn't changed")
Expect(resPvc.GetAnnotations()[AnnPodPhase]).To(BeEquivalentTo(corev1.PodRunning))
Expect(resPvc.GetAnnotations()[AnnImportPod]).To(Equal(pod.Name))
Expect(resPvc.GetAnnotations()[AnnPodRestarts]).To(Equal("0"))
// No scratch space because the pod is not in pending.
})
})

View File

@ -164,6 +164,16 @@ func (r *UploadReconciler) reconcilePVC(log logr.Logger, pvc *corev1.PersistentV
pvcCopy.Annotations[AnnPodPhase] = string(podPhase)
pvcCopy.Annotations[AnnPodReady] = strconv.FormatBool(isPodReady(pod))
if pod.Status.ContainerStatuses != nil {
// update pvc annotation tracking pod restarts only if the source pod restart count is greater
// see the same in clone-controller
pvcAnnPodRestarts, _ := strconv.Atoi(pvcCopy.Annotations[AnnPodRestarts])
podRestarts := int(pod.Status.ContainerStatuses[0].RestartCount)
if podRestarts > pvcAnnPodRestarts {
pvcCopy.Annotations[AnnPodRestarts] = strconv.Itoa(podRestarts)
}
}
if !reflect.DeepEqual(pvc, pvcCopy) {
if err := r.updatePVC(pvcCopy); err != nil {
return reconcile.Result{}, err

View File

@ -304,6 +304,76 @@ var _ = Describe("reconcilePVC loop", func() {
})
})
var _ = Describe("Update PVC", func() {
It("Should update AnnPodRestarts on pvc from upload pod restarts", func() {
testPvc := createPvc("testPvc1", "default",
map[string]string{
AnnUploadRequest: "",
AnnPodPhase: string(corev1.PodPending),
AnnPodRestarts: "1"}, nil)
pod := createUploadPod(testPvc)
pod.Status = corev1.PodStatus{
Phase: corev1.PodFailed,
ContainerStatuses: []corev1.ContainerStatus{
{
RestartCount: 2,
LastTerminationState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
Message: "I went poof",
},
},
},
},
}
reconciler := createUploadReconciler(testPvc, pod, createUploadService(testPvc))
_, err := reconciler.reconcilePVC(reconciler.Log, testPvc, false)
Expect(err).ToNot(HaveOccurred())
By("Verifying the pvc has restarts updated")
actualPvc := &corev1.PersistentVolumeClaim{}
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
Expect(err).ToNot(HaveOccurred())
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("2"))
})
It("Should not update AnnPodRestarts on pvc from pod if pod has lower restart count value ", func() {
testPvc := createPvc("testPvc1", "default",
map[string]string{
AnnUploadRequest: "",
AnnPodPhase: string(corev1.PodPending),
AnnPodRestarts: "3"},
nil)
pod := createUploadPod(testPvc)
pod.Status = corev1.PodStatus{
Phase: corev1.PodFailed,
ContainerStatuses: []corev1.ContainerStatus{
{
RestartCount: 2,
LastTerminationState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
Message: "I went poof",
},
},
},
},
}
reconciler := createUploadReconciler(testPvc, pod, createUploadService(testPvc))
_, err := reconciler.reconcilePVC(reconciler.Log, testPvc, false)
Expect(err).ToNot(HaveOccurred())
By("Verifying the pvc has original restart count")
actualPvc := &corev1.PersistentVolumeClaim{}
err = reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: "testPvc1", Namespace: "default"}, actualPvc)
Expect(err).ToNot(HaveOccurred())
Expect(actualPvc.Annotations[AnnPodRestarts]).To(Equal("3"))
})
})
func createUploadReconciler(objects ...runtime.Object) *UploadReconciler {
objs := []runtime.Object{}
objs = append(objs, objects...)

View File

@ -63,6 +63,8 @@ const (
AnnPodReady = AnnAPIGroup + "/storage.pod.ready"
// AnnOwnerRef is used when owner is in a different namespace
AnnOwnerRef = AnnAPIGroup + "/storage.ownerRef"
// AnnPodRestarts is a PVC annotation that tells how many times a related pod was restarted
AnnPodRestarts = AnnAPIGroup + "/storage.pod.restarts"
// SourceImageio is the source type ovirt-imageio
SourceImageio = "imageio"
)

View File

@ -165,6 +165,12 @@ func createDataVolumeCRD() *extv1beta1.CustomResourceDefinition {
Description: "Transfer progress in percentage if known, N/A otherwise",
JSONPath: ".status.progress",
},
{
Name: "Restarts",
Type: "integer",
Description: "The number of times the transfer has been restarted.",
JSONPath: ".status.restartCount",
},
{
Name: "Age",
Type: "date",

View File

@ -358,4 +358,23 @@ var _ = Describe("[vendor:cnv-qe@redhat.com][level:component]DataVolume tests",
}, timeout, pollingInterval).Should(BeTrue())
})
})
Describe("Restarts reporting on import datavolume", func() {
It("Should report restarts on failed import", func() {
dataVolume := utils.NewDataVolumeWithHTTPImport(dataVolumeName, "1Gi", InvalidQcowImagesURL)
By(fmt.Sprintf("creating new datavolume %s", dataVolume.Name))
dataVolume, err := utils.CreateDataVolumeFromDefinition(f.CdiClient, f.Namespace.Name, dataVolume)
Expect(err).ToNot(HaveOccurred())
By("Verify dv")
Eventually(func() int32 {
dv, err := f.CdiClient.CdiV1alpha1().DataVolumes(f.Namespace.Name).Get(dataVolume.Name, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
restarts := dv.Status.RestartCount
return restarts
}, timeout, pollingInterval).Should(BeNumerically(">=", 1))
Expect(err).ToNot(HaveOccurred())
})
})
})