mirror of
https://github.com/kubevirt/containerized-data-importer.git
synced 2025-06-03 06:30:22 +00:00

Signed-off-by: Aviv Litman <alitman@redhat.com> Co-authored-by: Aviv Litman <alitman@redhat.com>
453 lines
17 KiB
Go
453 lines
17 KiB
Go
package tests_test
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
. "github.com/onsi/ginkgo"
|
|
. "github.com/onsi/gomega"
|
|
|
|
corev1 "k8s.io/api/core/v1"
|
|
storagev1 "k8s.io/api/storage/v1"
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
crclient "sigs.k8s.io/controller-runtime/pkg/client"
|
|
|
|
promv1 "github.com/coreos/prometheus-operator/pkg/apis/monitoring/v1"
|
|
|
|
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
|
|
|
|
"kubevirt.io/containerized-data-importer/pkg/controller"
|
|
"kubevirt.io/containerized-data-importer/pkg/storagecapabilities"
|
|
"kubevirt.io/containerized-data-importer/tests/framework"
|
|
"kubevirt.io/containerized-data-importer/tests/utils"
|
|
)
|
|
|
|
const (
|
|
metricPollingInterval = 5 * time.Second
|
|
metricPollingTimeout = 5 * time.Minute
|
|
)
|
|
|
|
var _ = Describe("[Destructive] Monitoring Tests", func() {
|
|
f := framework.NewFramework("monitoring-test")
|
|
|
|
var (
|
|
cr *cdiv1.CDI
|
|
crModified bool
|
|
cdiPods *corev1.PodList
|
|
numAddedStorageClasses int
|
|
originalMetricVal int
|
|
)
|
|
|
|
waitForIncompleteMetricInitialization := func() {
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_incomplete_storageprofiles")
|
|
}, 2*time.Minute, 1*time.Second).ShouldNot(Equal(-1))
|
|
}
|
|
|
|
BeforeEach(func() {
|
|
if !f.IsPrometheusAvailable() {
|
|
Skip("This test depends on prometheus infra being available")
|
|
}
|
|
|
|
cr = getCDI(f)
|
|
cdiPods = getCDIPods(f)
|
|
|
|
waitForIncompleteMetricInitialization()
|
|
originalMetricVal = getMetricValue(f, "kubevirt_cdi_incomplete_storageprofiles")
|
|
})
|
|
|
|
AfterEach(func() {
|
|
By("Delete unknown storage classes")
|
|
for i := 0; i < numAddedStorageClasses; i++ {
|
|
name := fmt.Sprintf("unknown-sc-%d", i)
|
|
_, err := f.K8sClient.StorageV1().StorageClasses().Get(context.TODO(), name, metav1.GetOptions{})
|
|
if err != nil && errors.IsNotFound(err) {
|
|
continue
|
|
}
|
|
err = f.K8sClient.StorageV1().StorageClasses().Delete(context.TODO(), name, metav1.DeleteOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
}
|
|
|
|
if crModified {
|
|
removeCDI(f, cr)
|
|
ensureCDI(f, cr, cdiPods)
|
|
crModified = false
|
|
}
|
|
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_incomplete_storageprofiles")
|
|
}, 5*time.Minute, 5*time.Second).Should(BeNumerically("==", originalMetricVal))
|
|
})
|
|
|
|
Context("[rfe_id:7101][crit:medium][vendor:cnv-qe@redhat.com][level:component] Metrics and Alert tests", func() {
|
|
|
|
It("[test_id:9656] Metric kubevirt_cdi_cr_ready is 0 when CDI is not ready", func() {
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_cr_ready")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 1))
|
|
|
|
crModified = true
|
|
removeCDI(f, cr)
|
|
|
|
By("Creating new CDI with wrong NodeSelector")
|
|
cdi := &cdiv1.CDI{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: cr.Name,
|
|
},
|
|
Spec: cr.Spec,
|
|
}
|
|
cdi.Spec.Infra.NodeSelector = map[string]string{"wrong": "wrong"}
|
|
_, err := f.CdiClient.CdiV1beta1().CDIs().Create(context.TODO(), cdi, metav1.CreateOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
By("Wait for kubevirt_cdi_cr_ready == 0")
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_cr_ready")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 0))
|
|
|
|
By("Check that the CDINotReady alert is triggered")
|
|
waitForPrometheusAlert(f, "CDINotReady")
|
|
|
|
By("Revert CDI CR changes")
|
|
cdi, err = f.CdiClient.CdiV1beta1().CDIs().Get(context.TODO(), cr.Name, metav1.GetOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
cdi.Spec = cr.Spec
|
|
_, err = f.CdiClient.CdiV1beta1().CDIs().Update(context.TODO(), cdi, metav1.UpdateOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
waitCDI(f, cr, cdiPods)
|
|
crModified = false
|
|
|
|
By("Wait for kubevirt_cdi_cr_ready == 1")
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_cr_ready")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 1))
|
|
})
|
|
|
|
It("[test_id:7963] CDI ready metric value as expected when ready to use", func() {
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_cr_ready")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 1))
|
|
})
|
|
|
|
It("[test_id:7965] StorageProfile incomplete metric expected value when creating an incomplete profile", func() {
|
|
defaultStorageClass := utils.GetDefaultStorageClass(f.K8sClient)
|
|
defaultStorageClassProfile := &cdiv1.StorageProfile{}
|
|
err := f.CrClient.Get(context.TODO(), types.NamespacedName{Name: defaultStorageClass.Name}, defaultStorageClassProfile)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
numAddedStorageClasses = 2
|
|
for i := 0; i < numAddedStorageClasses; i++ {
|
|
_, err = f.K8sClient.StorageV1().StorageClasses().Create(context.TODO(), createUnknownStorageClass(fmt.Sprintf("unknown-sc-%d", i), "kubernetes.io/non-existent-provisioner"), metav1.CreateOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
}
|
|
|
|
expectedIncomplete := originalMetricVal + numAddedStorageClasses
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_incomplete_storageprofiles")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", expectedIncomplete))
|
|
|
|
By("Check that the CDIStorageProfilesIncomplete alert is triggered")
|
|
waitForPrometheusAlert(f, "CDIStorageProfilesIncomplete")
|
|
|
|
By("Fix profiles to be complete and test metric value equals original")
|
|
for i := 0; i < numAddedStorageClasses; i++ {
|
|
profile := &cdiv1.StorageProfile{}
|
|
err = f.CrClient.Get(context.TODO(), types.NamespacedName{Name: fmt.Sprintf("unknown-sc-%d", i)}, profile)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
// These might be wrong values, but at least the profile is no longer incomplete
|
|
profile.Spec.ClaimPropertySets = defaultStorageClassProfile.Status.ClaimPropertySets
|
|
err = f.CrClient.Update(context.TODO(), profile)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
expectedIncomplete--
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_incomplete_storageprofiles")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", expectedIncomplete))
|
|
}
|
|
})
|
|
|
|
It("[test_id:9659] StorageProfile incomplete metric expected value remains unchanged for provisioner known to not work", func() {
|
|
sc, err := f.K8sClient.StorageV1().StorageClasses().Create(context.TODO(), createUnknownStorageClass("unsupported-provisioner", storagecapabilities.ProvisionerNoobaa), metav1.CreateOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
|
|
By("Profile created and is incomplete")
|
|
profile := &cdiv1.StorageProfile{}
|
|
Eventually(func() error {
|
|
return f.CrClient.Get(context.TODO(), types.NamespacedName{Name: sc.Name}, profile)
|
|
}, 20*time.Second, 1*time.Second).Should(Succeed())
|
|
Expect(profile.Status.ClaimPropertySets).To(BeNil())
|
|
|
|
By("Metric stays the same because we don't support this provisioner")
|
|
Consistently(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_incomplete_storageprofiles")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(Equal(originalMetricVal))
|
|
})
|
|
|
|
It("[test_id:7964] DataImportCron failing metric expected value when patching DesiredDigest annotation with junk sha256 value", func() {
|
|
numCrons := 2
|
|
originalCronMetricVal := getMetricValue(f, "kubevirt_cdi_dataimportcron_outdated_aggregated")
|
|
expectedFailingCrons := originalCronMetricVal + numCrons
|
|
|
|
reg, err := getDataVolumeSourceRegistry(f)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
defer func() {
|
|
if err := utils.RemoveInsecureRegistry(f.CrClient, *reg.URL); err != nil {
|
|
_, _ = fmt.Fprintf(GinkgoWriter, "failed to remove registry; %v", err)
|
|
}
|
|
}()
|
|
|
|
for i := 1; i < numCrons+1; i++ {
|
|
cron := utils.NewDataImportCron(fmt.Sprintf("cron-test-%d", i), "5Gi", scheduleOnceAYear, fmt.Sprintf("datasource-test-%d", i), 1, *reg)
|
|
By(fmt.Sprintf("Create new DataImportCron %s", *reg.URL))
|
|
cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Create(context.TODO(), cron, metav1.CreateOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
By("Wait for condition UpToDate=true on DataImportCron")
|
|
Eventually(func() bool {
|
|
cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Get(context.TODO(), cron.Name, metav1.GetOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
upToDateCondition := controller.FindDataImportCronConditionByType(cron, cdiv1.DataImportCronUpToDate)
|
|
return upToDateCondition != nil && upToDateCondition.ConditionState.Status == corev1.ConditionTrue
|
|
}, dataImportCronTimeout, pollingInterval).Should(BeTrue())
|
|
Eventually(func() error {
|
|
cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Get(context.TODO(), cron.Name, metav1.GetOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
if cron.Annotations == nil {
|
|
cron.Annotations = make(map[string]string)
|
|
}
|
|
// notarealsha
|
|
cron.Annotations[controller.AnnSourceDesiredDigest] = "sha256:c616e1c85568b1f1d528da2b3dbc257fd2035ada441e286a8c42606491442a5d"
|
|
cron, err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Update(context.TODO(), cron, metav1.UpdateOptions{})
|
|
return err
|
|
}, dataImportCronTimeout, pollingInterval).Should(BeNil())
|
|
By(fmt.Sprintf("Ensuring metric value incremented to %d", originalCronMetricVal+i))
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_dataimportcron_outdated_aggregated")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", originalCronMetricVal+i))
|
|
}
|
|
By("Ensure metric value decrements when crons are cleaned up")
|
|
for i := 1; i < numCrons+1; i++ {
|
|
err = f.CdiClient.CdiV1beta1().DataImportCrons(f.Namespace.Name).Delete(context.TODO(), fmt.Sprintf("cron-test-%d", i), metav1.DeleteOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_dataimportcron_outdated_aggregated")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", expectedFailingCrons-i))
|
|
}
|
|
})
|
|
|
|
It("[test_id:7962] CDIOperatorDown alert firing when operator scaled down", func() {
|
|
deploymentName := "cdi-operator"
|
|
By("Scale down operator so alert will trigger")
|
|
originalReplicas := scaleDeployment(f, deploymentName, 0)
|
|
Eventually(func() bool {
|
|
dep, err := f.K8sClient.AppsV1().Deployments(f.CdiInstallNs).Get(context.TODO(), deploymentName, metav1.GetOptions{})
|
|
Expect(err).ToNot(HaveOccurred())
|
|
return dep.Status.Replicas == 0
|
|
}, 20*time.Second, 1*time.Second).Should(BeTrue())
|
|
|
|
By("Waiting for kubevirt_cdi_operator_up metric to be 0")
|
|
Eventually(func() int {
|
|
return getMetricValue(f, "kubevirt_cdi_operator_up")
|
|
}, metricPollingTimeout, metricPollingInterval).Should(BeNumerically("==", 0))
|
|
|
|
By("Waiting for CDIOperatorDown alert to be triggered")
|
|
waitForPrometheusAlert(f, "CDIOperatorDown")
|
|
|
|
By("Ensuring original value of replicas restored")
|
|
scaleDeployment(f, deploymentName, originalReplicas)
|
|
err := utils.WaitForDeploymentReplicasReady(f.K8sClient, f.CdiInstallNs, deploymentName)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
})
|
|
})
|
|
|
|
Context("Prometheus Rule configuration", func() {
|
|
It("[test_id:8259] Alerts should have all the required annotations", func() {
|
|
promRule := getPrometheusRule(f)
|
|
for _, group := range promRule.Spec.Groups {
|
|
if group.Name == "cdi.rules" {
|
|
for _, rule := range group.Rules {
|
|
if rule.Alert != "" {
|
|
Expect(rule.Annotations).ToNot(BeNil())
|
|
checkForRunbookURL(rule)
|
|
checkForSummary(rule)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
It("[test_id:8812] Alerts should have all the required labels", func() {
|
|
promRule := getPrometheusRule(f)
|
|
for _, group := range promRule.Spec.Groups {
|
|
if group.Name == "cdi.rules" {
|
|
for _, rule := range group.Rules {
|
|
if rule.Alert != "" {
|
|
Expect(rule.Labels).ToNot(BeNil())
|
|
checkForSeverityLabel(rule)
|
|
checkForHealthImpactLabel(rule)
|
|
checkForPartOfLabel(rule)
|
|
checkForComponentLabel(rule)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
})
|
|
})
|
|
})
|
|
|
|
func dataVolumeUnusualRestartTest(f *framework.Framework) {
|
|
By("Test metric for unusual restart count")
|
|
Eventually(func() bool {
|
|
return getMetricValue(f, "kubevirt_cdi_import_pods_high_restart") == 1
|
|
}, 2*time.Minute, 1*time.Second).Should(BeTrue())
|
|
|
|
By("checking that the CDIDataVolumeUnusualRestartCount alert is triggered")
|
|
waitForPrometheusAlert(f, "CDIDataVolumeUnusualRestartCount")
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
func getMetricValue(f *framework.Framework, endpoint string) int {
|
|
var returnVal string
|
|
|
|
Eventually(func() bool {
|
|
var result map[string]interface{}
|
|
resp := f.MakePrometheusHTTPRequest("query?query=" + endpoint)
|
|
defer resp.Body.Close()
|
|
bodyBytes, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
err = json.Unmarshal(bodyBytes, &result)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
if len(result["data"].(map[string]interface{})["result"].([]interface{})) == 0 {
|
|
return false
|
|
}
|
|
values := result["data"].(map[string]interface{})["result"].([]interface{})[0].(map[string]interface{})["value"].([]interface{})
|
|
for _, v := range values {
|
|
if s, ok := v.(string); ok {
|
|
returnVal = s
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}, 1*time.Minute, 1*time.Second).Should(BeTrue())
|
|
|
|
i, err := strconv.Atoi(returnVal)
|
|
Expect(err).ToNot(HaveOccurred())
|
|
return i
|
|
}
|
|
|
|
func createUnknownStorageClass(name, provisioner string) *storagev1.StorageClass {
|
|
immediateBinding := storagev1.VolumeBindingImmediate
|
|
|
|
return &storagev1.StorageClass{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: name,
|
|
Labels: map[string]string{
|
|
"cdi.kubevirt.io/testing": "",
|
|
},
|
|
},
|
|
Provisioner: provisioner,
|
|
VolumeBindingMode: &immediateBinding,
|
|
}
|
|
}
|
|
|
|
func getPrometheusRule(f *framework.Framework) *promv1.PrometheusRule {
|
|
By("Wait for prometheus-cdi-rules")
|
|
promRule := &promv1.PrometheusRule{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: "prometheus-cdi-rules",
|
|
Namespace: f.CdiInstallNs,
|
|
},
|
|
}
|
|
Eventually(func() error {
|
|
return f.CrClient.Get(context.TODO(), crclient.ObjectKeyFromObject(promRule), promRule)
|
|
}, 5*time.Minute, 1*time.Second).Should(BeNil())
|
|
return promRule
|
|
}
|
|
|
|
func waitForPrometheusAlert(f *framework.Framework, alertName string) {
|
|
By("Wait for alert to be triggered")
|
|
Eventually(func() bool {
|
|
result, err := getPrometheusAlerts(f)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
alerts := result["data"].(map[string]interface{})["alerts"].([]interface{})
|
|
for _, alert := range alerts {
|
|
name := alert.(map[string]interface{})["labels"].(map[string]interface{})["alertname"].(string)
|
|
if name == alertName {
|
|
state := alert.(map[string]interface{})["state"].(string)
|
|
By(fmt.Sprintf("Alert %s state %s", name, state))
|
|
return state == "pending" || state == "firing"
|
|
}
|
|
}
|
|
|
|
return false
|
|
}, 10*time.Minute, 1*time.Second).Should(BeTrue())
|
|
}
|
|
|
|
func getPrometheusAlerts(f *framework.Framework) (alerts map[string]interface{}, err error) {
|
|
resp := f.MakePrometheusHTTPRequest("alerts")
|
|
defer resp.Body.Close()
|
|
|
|
bodyBytes, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return alerts, err
|
|
}
|
|
|
|
err = json.Unmarshal(bodyBytes, &alerts)
|
|
if err != nil {
|
|
return alerts, err
|
|
}
|
|
|
|
return alerts, nil
|
|
}
|
|
|
|
func checkForRunbookURL(rule promv1.Rule) {
|
|
url, ok := rule.Annotations["runbook_url"]
|
|
ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have runbook_url annotation", rule.Alert))
|
|
resp, err := http.Head(url)
|
|
ExpectWithOffset(1, err).ToNot(HaveOccurred(), fmt.Sprintf("%s runbook is not available", rule.Alert))
|
|
ExpectWithOffset(1, resp.StatusCode).Should(Equal(http.StatusOK), fmt.Sprintf("%s runbook is not available", rule.Alert))
|
|
}
|
|
|
|
func checkForSummary(rule promv1.Rule) {
|
|
summary, ok := rule.Annotations["summary"]
|
|
ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have summary annotation", rule.Alert))
|
|
ExpectWithOffset(1, summary).ToNot(BeEmpty(), fmt.Sprintf("%s has an empty summary", rule.Alert))
|
|
}
|
|
|
|
func checkForSeverityLabel(rule promv1.Rule) {
|
|
severity, ok := rule.Labels["severity"]
|
|
ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have severity label", rule.Alert))
|
|
ExpectWithOffset(1, severity).To(BeElementOf("info", "warning", "critical"), fmt.Sprintf("%s severity label is not valid", rule.Alert))
|
|
}
|
|
|
|
func checkForHealthImpactLabel(rule promv1.Rule) {
|
|
operatorHealthImpact, ok := rule.Labels["operator_health_impact"]
|
|
ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have operator_health_impact label", rule.Alert))
|
|
ExpectWithOffset(1, operatorHealthImpact).To(BeElementOf("none", "warning", "critical"), fmt.Sprintf("%s operator_health_impact label is not valid", rule.Alert))
|
|
}
|
|
|
|
func checkForPartOfLabel(rule promv1.Rule) {
|
|
kubernetesOperatorPartOf, ok := rule.Labels["kubernetes_operator_part_of"]
|
|
ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have kubernetes_operator_part_of label", rule.Alert))
|
|
ExpectWithOffset(1, kubernetesOperatorPartOf).To(Equal("kubevirt"), fmt.Sprintf("%s kubernetes_operator_part_of label is not valid", rule.Alert))
|
|
}
|
|
|
|
func checkForComponentLabel(rule promv1.Rule) {
|
|
kubernetesOperatorComponent, ok := rule.Labels["kubernetes_operator_component"]
|
|
ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have kubernetes_operator_component label", rule.Alert))
|
|
ExpectWithOffset(1, kubernetesOperatorComponent).To(Equal("containerized-data-importer"), fmt.Sprintf("%s kubernetes_operator_component label is not valid", rule.Alert))
|
|
}
|