mirror of
https://github.com/kubevirt/containerized-data-importer.git
synced 2025-06-03 06:30:22 +00:00
Refactor recording-rules and alerts code (#3068)
* Refactor recording-rules and alerts code Signed-off-by: avlitman <alitman@redhat.com> * Remove promv1 from schema Signed-off-by: avlitman <alitman@redhat.com> --------- Signed-off-by: avlitman <alitman@redhat.com>
This commit is contained in:
parent
24c9eb5706
commit
42ec627e35
2
Makefile
2
Makefile
@ -156,7 +156,7 @@ openshift-ci-image-push:
|
|||||||
./hack/build/osci-image-builder.sh
|
./hack/build/osci-image-builder.sh
|
||||||
|
|
||||||
generate-doc: build-docgen
|
generate-doc: build-docgen
|
||||||
_out/pkg/monitoring/tools/metricsdocs/metricsdocs > doc/metrics.md
|
_out/tools/metricsdocs/metricsdocs > doc/metrics.md
|
||||||
|
|
||||||
bootstrap-ginkgo:
|
bootstrap-ginkgo:
|
||||||
${DO_BAZ} ./hack/build/bootstrap-ginkgo.sh
|
${DO_BAZ} ./hack/build/bootstrap-ginkgo.sh
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
# Containerized Data Importer metrics
|
# Containerized Data Importer metrics
|
||||||
|
|
||||||
|
### kubevirt_cdi_clone_pods_high_restart
|
||||||
|
The number of CDI clone pods with high restart count. Type: Gauge.
|
||||||
|
|
||||||
### kubevirt_cdi_cr_ready
|
### kubevirt_cdi_cr_ready
|
||||||
CDI install ready. Type: Gauge.
|
CDI install ready. Type: Gauge.
|
||||||
|
|
||||||
@ -9,18 +12,15 @@ DataImportCron has an outdated import. Type: Gauge.
|
|||||||
### kubevirt_cdi_datavolume_pending
|
### kubevirt_cdi_datavolume_pending
|
||||||
Number of DataVolumes pending for default storage class to be configured. Type: Gauge.
|
Number of DataVolumes pending for default storage class to be configured. Type: Gauge.
|
||||||
|
|
||||||
### kubevirt_cdi_storageprofile_info
|
|
||||||
`StorageProfiles` info labels: `storageclass`, `provisioner`, `complete` indicates if all storage profiles recommended PVC settings are complete, `default` indicates if it's the Kubernetes default storage class, `virtdefault` indicates if it's the default virtualization storage class, `rwx` indicates if the storage class supports `ReadWriteMany`, `smartclone` indicates if it supports snapshot or CSI based clone. Type: Gauge.
|
|
||||||
|
|
||||||
### kubevirt_cdi_clone_pods_high_restart
|
|
||||||
The number of CDI clone pods with high restart count. Type: Gauge.
|
|
||||||
|
|
||||||
### kubevirt_cdi_import_pods_high_restart
|
### kubevirt_cdi_import_pods_high_restart
|
||||||
The number of CDI import pods with high restart count. Type: Gauge.
|
The number of CDI import pods with high restart count. Type: Gauge.
|
||||||
|
|
||||||
### kubevirt_cdi_operator_up
|
### kubevirt_cdi_operator_up
|
||||||
CDI operator status. Type: Gauge.
|
CDI operator status. Type: Gauge.
|
||||||
|
|
||||||
|
### kubevirt_cdi_storageprofile_info
|
||||||
|
`StorageProfiles` info labels: `storageclass`, `provisioner`, `complete` indicates if all storage profiles recommended PVC settings are complete, `default` indicates if it's the Kubernetes default storage class, `virtdefault` indicates if it's the default virtualization storage class, `rwx` indicates if the storage class supports `ReadWriteMany`, `smartclone` indicates if it supports snapshot or CSI based clone. Type: Gauge.
|
||||||
|
|
||||||
### kubevirt_cdi_upload_pods_high_restart
|
### kubevirt_cdi_upload_pods_high_restart
|
||||||
The number of CDI upload server pods with high restart count. Type: Gauge.
|
The number of CDI upload server pods with high restart count. Type: Gauge.
|
||||||
|
|
||||||
|
2
go.mod
2
go.mod
@ -23,7 +23,7 @@ require (
|
|||||||
github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.1
|
github.com/kubernetes-csi/external-snapshotter/client/v6 v6.0.1
|
||||||
github.com/kubernetes-csi/lib-volume-populator v1.2.1-0.20230316163120-b62a0eee2c56
|
github.com/kubernetes-csi/lib-volume-populator v1.2.1-0.20230316163120-b62a0eee2c56
|
||||||
github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a
|
github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a
|
||||||
github.com/machadovilaca/operator-observability v0.0.9
|
github.com/machadovilaca/operator-observability v0.0.13
|
||||||
github.com/onsi/ginkgo/v2 v2.12.0
|
github.com/onsi/ginkgo/v2 v2.12.0
|
||||||
github.com/onsi/gomega v1.27.10
|
github.com/onsi/gomega v1.27.10
|
||||||
github.com/openshift/api v0.0.0-20240116035456-11ed2fbcb805
|
github.com/openshift/api v0.0.0-20240116035456-11ed2fbcb805
|
||||||
|
4
go.sum
4
go.sum
@ -1099,8 +1099,8 @@ github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4
|
|||||||
github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a/go.mod h1:qGj2agzgwQ27nYhP3xhLs+IBzE5+ALNUg8bDfMcwPqo=
|
github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a/go.mod h1:qGj2agzgwQ27nYhP3xhLs+IBzE5+ALNUg8bDfMcwPqo=
|
||||||
github.com/lyft/protoc-gen-star v0.6.0/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
|
github.com/lyft/protoc-gen-star v0.6.0/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
|
||||||
github.com/lyft/protoc-gen-star v0.6.1/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
|
github.com/lyft/protoc-gen-star v0.6.1/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
|
||||||
github.com/machadovilaca/operator-observability v0.0.9 h1:jL2jVh0YJNA3nSX216X74RDZiEPPvsgqXollYmMOQkg=
|
github.com/machadovilaca/operator-observability v0.0.13 h1:9mhxEjkdE6pcl3ke8chbbAWxx25+K1m4Gq31yo+r2JU=
|
||||||
github.com/machadovilaca/operator-observability v0.0.9/go.mod h1:NGkaR3HEYLScVQf6kQAyxWOSN1ltHcsEvHU/8iIJ8cE=
|
github.com/machadovilaca/operator-observability v0.0.13/go.mod h1:e4Z3VhOXb9InkmSh00JjqBBijE+iD+YMzynBpKB3+gE=
|
||||||
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
|
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
|
||||||
github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||||
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||||
|
@ -29,10 +29,10 @@ mkdir -p ${CMD_OUT_DIR}/dump
|
|||||||
bazel build \
|
bazel build \
|
||||||
--verbose_failures \
|
--verbose_failures \
|
||||||
--config=${ARCHITECTURE} \
|
--config=${ARCHITECTURE} \
|
||||||
//pkg/monitoring/tools/metricsdocs/...
|
//tools/metricsdocs/...
|
||||||
|
|
||||||
rm -rf _out/pkg/monitoring/tools/metricsdocs
|
rm -rf _out/tools/metricsdocs
|
||||||
mkdir -p _out/pkg/monitoring/tools/metricsdocs
|
mkdir -p _out/tools/metricsdocs
|
||||||
cp ./bazel-bin/pkg/monitoring/tools/metricsdocs/metricsdocs_/metricsdocs _out/pkg/monitoring/tools/metricsdocs/
|
cp ./bazel-bin/tools/metricsdocs/metricsdocs_/metricsdocs _out/tools/metricsdocs/
|
||||||
|
|
||||||
bazel clean
|
bazel clean
|
||||||
|
@ -23,7 +23,7 @@ set -e
|
|||||||
linter_image_tag="v0.0.1"
|
linter_image_tag="v0.0.1"
|
||||||
|
|
||||||
PROJECT_ROOT="$(readlink -e "$(dirname "${BASH_SOURCE[0]}")"/../../)"
|
PROJECT_ROOT="$(readlink -e "$(dirname "${BASH_SOURCE[0]}")"/../../)"
|
||||||
export METRICS_COLLECTOR_PATH="${METRICS_COLLECTOR_PATH:-${PROJECT_ROOT}/pkg/monitoring/tools/prom-metrics-collector}"
|
export METRICS_COLLECTOR_PATH="${METRICS_COLLECTOR_PATH:-${PROJECT_ROOT}/tools/prom-metrics-collector}"
|
||||||
|
|
||||||
if [[ ! -d "$METRICS_COLLECTOR_PATH" ]]; then
|
if [[ ! -d "$METRICS_COLLECTOR_PATH" ]]; then
|
||||||
echo "Invalid METRICS_COLLECTOR_PATH: $METRICS_COLLECTOR_PATH is not a valid directory path"
|
echo "Invalid METRICS_COLLECTOR_PATH: $METRICS_COLLECTOR_PATH is not a valid directory path"
|
||||||
|
@ -24,7 +24,7 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/types"
|
"k8s.io/apimachinery/pkg/types"
|
||||||
|
|
||||||
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
|
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
|
||||||
"kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/cdi-controller"
|
metrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/cdi-controller"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package metrics
|
package cdicontroller
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package metrics
|
package cdicontroller
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package metrics
|
package cdicontroller
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package metrics
|
package cdicontroller
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package metrics
|
package operatorcontroller
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package metrics
|
package operatorcontroller
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
||||||
|
15
pkg/monitoring/rules/BUILD.bazel
Normal file
15
pkg/monitoring/rules/BUILD.bazel
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||||
|
|
||||||
|
go_library(
|
||||||
|
name = "go_default_library",
|
||||||
|
srcs = ["rules.go"],
|
||||||
|
importpath = "kubevirt.io/containerized-data-importer/pkg/monitoring/rules",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//pkg/common:go_default_library",
|
||||||
|
"//pkg/monitoring/rules/alerts:go_default_library",
|
||||||
|
"//pkg/monitoring/rules/recordingrules:go_default_library",
|
||||||
|
"//vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules:go_default_library",
|
||||||
|
"//vendor/github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1:go_default_library",
|
||||||
|
],
|
||||||
|
)
|
18
pkg/monitoring/rules/alerts/BUILD.bazel
Normal file
18
pkg/monitoring/rules/alerts/BUILD.bazel
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||||
|
|
||||||
|
go_library(
|
||||||
|
name = "go_default_library",
|
||||||
|
srcs = [
|
||||||
|
"operator.go",
|
||||||
|
"prometheus.go",
|
||||||
|
],
|
||||||
|
importpath = "kubevirt.io/containerized-data-importer/pkg/monitoring/rules/alerts",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//pkg/common:go_default_library",
|
||||||
|
"//vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules:go_default_library",
|
||||||
|
"//vendor/github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1:go_default_library",
|
||||||
|
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||||
|
"//vendor/k8s.io/utils/ptr:go_default_library",
|
||||||
|
],
|
||||||
|
)
|
109
pkg/monitoring/rules/alerts/operator.go
Normal file
109
pkg/monitoring/rules/alerts/operator.go
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
package alerts
|
||||||
|
|
||||||
|
import (
|
||||||
|
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/intstr"
|
||||||
|
"k8s.io/utils/ptr"
|
||||||
|
)
|
||||||
|
|
||||||
|
var operatorAlerts = []promv1.Rule{
|
||||||
|
{
|
||||||
|
Alert: "CDIOperatorDown",
|
||||||
|
Expr: intstr.FromString("kubevirt_cdi_operator_up == 0"),
|
||||||
|
For: (*promv1.Duration)(ptr.To("5m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "CDI operator is down",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "warning",
|
||||||
|
operatorHealthImpactLabelKey: "critical",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Alert: "CDINotReady",
|
||||||
|
Expr: intstr.FromString("kubevirt_cdi_cr_ready == 0"),
|
||||||
|
For: (*promv1.Duration)(ptr.To("5m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "CDI is not available to use",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "warning",
|
||||||
|
operatorHealthImpactLabelKey: "critical",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Alert: "CDIDataVolumeUnusualRestartCount",
|
||||||
|
Expr: intstr.FromString("kubevirt_cdi_import_pods_high_restart > 0 or kubevirt_cdi_upload_pods_high_restart > 0 or kubevirt_cdi_clone_pods_high_restart > 0"),
|
||||||
|
For: (*promv1.Duration)(ptr.To("5m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "Some CDI population workloads have an unusual restart count, meaning they are probably failing and need to be investigated",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "warning",
|
||||||
|
operatorHealthImpactLabelKey: "warning",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Alert: "CDIStorageProfilesIncomplete",
|
||||||
|
Expr: intstr.FromString(`sum by(storageclass,provisioner) ((kubevirt_cdi_storageprofile_info{complete="false"}>0))`),
|
||||||
|
For: (*promv1.Duration)(ptr.To("5m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "Incomplete StorageProfile {{ $labels.storageclass }}, accessMode/volumeMode cannot be inferred by CDI for PVC population request",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "info",
|
||||||
|
operatorHealthImpactLabelKey: "warning",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Alert: "CDIDataImportCronOutdated",
|
||||||
|
Expr: intstr.FromString("sum by(ns,cron_name) (kubevirt_cdi_dataimportcron_outdated) > 0"),
|
||||||
|
For: (*promv1.Duration)(ptr.To("15m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "DataImportCron (recurring polling of VM templates disk image sources, also known as golden images) PVCs are not being updated on the defined schedule",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "info",
|
||||||
|
operatorHealthImpactLabelKey: "warning",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Alert: "CDINoDefaultStorageClass",
|
||||||
|
Expr: intstr.FromString(`sum(kubevirt_cdi_storageprofile_info{default="true"} or on() vector(0)) +
|
||||||
|
sum(kubevirt_cdi_storageprofile_info{virtdefault="true"} or on() vector(0)) +
|
||||||
|
(count(kubevirt_cdi_datavolume_pending == 0) or on() vector(0)) == 0`),
|
||||||
|
For: (*promv1.Duration)(ptr.To("5m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "No default StorageClass or virtualization StorageClass, and a DataVolume is pending for one",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "warning",
|
||||||
|
operatorHealthImpactLabelKey: "none",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Alert: "CDIMultipleDefaultVirtStorageClasses",
|
||||||
|
Expr: intstr.FromString(`sum(kubevirt_cdi_storageprofile_info{virtdefault="true"} or on() vector(0)) > 1`),
|
||||||
|
For: (*promv1.Duration)(ptr.To("5m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "More than one default virtualization StorageClass detected",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "warning",
|
||||||
|
operatorHealthImpactLabelKey: "none",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Alert: "CDIDefaultStorageClassDegraded",
|
||||||
|
Expr: intstr.FromString(`sum(kubevirt_cdi_storageprofile_info{default="true",rwx="true",smartclone="true"} or on() vector(0)) +
|
||||||
|
sum(kubevirt_cdi_storageprofile_info{virtdefault="true",rwx="true",smartclone="true"} or on() vector(0)) == 0`),
|
||||||
|
For: (*promv1.Duration)(ptr.To("5m")),
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"summary": "Default storage class has no smart clone or ReadWriteMany",
|
||||||
|
},
|
||||||
|
Labels: map[string]string{
|
||||||
|
severityAlertLabelKey: "warning",
|
||||||
|
operatorHealthImpactLabelKey: "none",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
58
pkg/monitoring/rules/alerts/prometheus.go
Normal file
58
pkg/monitoring/rules/alerts/prometheus.go
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
package alerts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/machadovilaca/operator-observability/pkg/operatorrules"
|
||||||
|
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/common"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
prometheusRunbookAnnotationKey = "runbook_url"
|
||||||
|
defaultRunbookURLTemplate = "https://kubevirt.io/monitoring/runbooks/%s"
|
||||||
|
runbookURLTemplateEnv = "RUNBOOK_URL_TEMPLATE"
|
||||||
|
|
||||||
|
severityAlertLabelKey = "severity"
|
||||||
|
operatorHealthImpactLabelKey = "operator_health_impact"
|
||||||
|
partOfAlertLabelKey = "kubernetes_operator_part_of"
|
||||||
|
componentAlertLabelKey = "kubernetes_operator_component"
|
||||||
|
partOfAlertLabelValue = "kubevirt"
|
||||||
|
|
||||||
|
componentAlertLabelValue = common.CDILabelValue
|
||||||
|
)
|
||||||
|
|
||||||
|
// Register sets up alert rules in the given namespace.
|
||||||
|
func Register(namespace string) error {
|
||||||
|
alerts := [][]promv1.Rule{
|
||||||
|
operatorAlerts,
|
||||||
|
}
|
||||||
|
|
||||||
|
runbookURLTemplate := GetRunbookURLTemplate()
|
||||||
|
for _, alertGroup := range alerts {
|
||||||
|
for _, alert := range alertGroup {
|
||||||
|
alert.Labels[partOfAlertLabelKey] = partOfAlertLabelValue
|
||||||
|
alert.Labels[componentAlertLabelKey] = componentAlertLabelValue
|
||||||
|
alert.Annotations[prometheusRunbookAnnotationKey] = fmt.Sprintf(runbookURLTemplate, alert.Alert)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return operatorrules.RegisterAlerts(alerts...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRunbookURLTemplate fetches or defaults the runbook URL template.
|
||||||
|
func GetRunbookURLTemplate() string {
|
||||||
|
runbookURLTemplate, exists := os.LookupEnv(runbookURLTemplateEnv)
|
||||||
|
if !exists {
|
||||||
|
runbookURLTemplate = defaultRunbookURLTemplate
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Count(runbookURLTemplate, "%s") != 1 {
|
||||||
|
panic(errors.New("runbook URL template must have exactly 1 %s substring"))
|
||||||
|
}
|
||||||
|
|
||||||
|
return runbookURLTemplate
|
||||||
|
}
|
@ -2,8 +2,17 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
|||||||
|
|
||||||
go_library(
|
go_library(
|
||||||
name = "go_default_library",
|
name = "go_default_library",
|
||||||
srcs = ["recordingrules.go"],
|
srcs = [
|
||||||
|
"operator.go",
|
||||||
|
"pods.go",
|
||||||
|
"recordingrules.go",
|
||||||
|
],
|
||||||
importpath = "kubevirt.io/containerized-data-importer/pkg/monitoring/rules/recordingrules",
|
importpath = "kubevirt.io/containerized-data-importer/pkg/monitoring/rules/recordingrules",
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = ["//pkg/common:go_default_library"],
|
deps = [
|
||||||
|
"//pkg/common:go_default_library",
|
||||||
|
"//vendor/github.com/machadovilaca/operator-observability/pkg/operatormetrics:go_default_library",
|
||||||
|
"//vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules:go_default_library",
|
||||||
|
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
24
pkg/monitoring/rules/recordingrules/operator.go
Normal file
24
pkg/monitoring/rules/recordingrules/operator.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package recordingrules
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
||||||
|
"github.com/machadovilaca/operator-observability/pkg/operatorrules"
|
||||||
|
"k8s.io/apimachinery/pkg/util/intstr"
|
||||||
|
)
|
||||||
|
|
||||||
|
func operatorRecordingRules(namespace string) []operatorrules.RecordingRule {
|
||||||
|
return []operatorrules.RecordingRule{
|
||||||
|
{
|
||||||
|
MetricsOpts: operatormetrics.MetricOpts{
|
||||||
|
Name: "kubevirt_cdi_operator_up",
|
||||||
|
Help: "CDI operator status",
|
||||||
|
},
|
||||||
|
MetricType: operatormetrics.GaugeType,
|
||||||
|
Expr: intstr.FromString(
|
||||||
|
fmt.Sprintf("sum(up{namespace='%s', pod=~'cdi-operator-.*'} or vector(0))", namespace),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
45
pkg/monitoring/rules/recordingrules/pods.go
Normal file
45
pkg/monitoring/rules/recordingrules/pods.go
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
package recordingrules
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
||||||
|
"github.com/machadovilaca/operator-observability/pkg/operatorrules"
|
||||||
|
"k8s.io/apimachinery/pkg/util/intstr"
|
||||||
|
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/common"
|
||||||
|
)
|
||||||
|
|
||||||
|
var podsRecordingRules = []operatorrules.RecordingRule{
|
||||||
|
{
|
||||||
|
MetricsOpts: operatormetrics.MetricOpts{
|
||||||
|
Name: "kubevirt_cdi_import_pods_high_restart",
|
||||||
|
Help: "The number of CDI import pods with high restart count",
|
||||||
|
},
|
||||||
|
MetricType: operatormetrics.GaugeType,
|
||||||
|
Expr: intstr.FromString(
|
||||||
|
fmt.Sprintf("count(kube_pod_container_status_restarts_total{pod=~'%s-.*', container='%s'} > %s) or on() vector(0)", common.ImporterPodName, common.ImporterPodName, strconv.Itoa(common.UnusualRestartCountThreshold)),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MetricsOpts: operatormetrics.MetricOpts{
|
||||||
|
Name: "kubevirt_cdi_upload_pods_high_restart",
|
||||||
|
Help: "The number of CDI upload server pods with high restart count",
|
||||||
|
},
|
||||||
|
MetricType: operatormetrics.GaugeType,
|
||||||
|
Expr: intstr.FromString(
|
||||||
|
fmt.Sprintf("count(kube_pod_container_status_restarts_total{pod=~'%s-.*', container='%s'} > %s) or on() vector(0)", common.UploadPodName, common.UploadServerPodname, strconv.Itoa(common.UnusualRestartCountThreshold)),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MetricsOpts: operatormetrics.MetricOpts{
|
||||||
|
Name: "kubevirt_cdi_clone_pods_high_restart",
|
||||||
|
Help: "The number of CDI clone pods with high restart count",
|
||||||
|
},
|
||||||
|
MetricType: operatormetrics.GaugeType,
|
||||||
|
Expr: intstr.FromString(
|
||||||
|
fmt.Sprintf("count(kube_pod_container_status_restarts_total{pod=~'.*%s', container='%s'} > %s) or on() vector(0)", common.ClonerSourcePodNameSuffix, common.ClonerSourcePodName, strconv.Itoa(common.UnusualRestartCountThreshold)),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
@ -1,59 +1,11 @@
|
|||||||
package recordingrules
|
package recordingrules
|
||||||
|
|
||||||
import (
|
import "github.com/machadovilaca/operator-observability/pkg/operatorrules"
|
||||||
"fmt"
|
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"kubevirt.io/containerized-data-importer/pkg/common"
|
// Register sets up recording rules in the given namespace.
|
||||||
)
|
func Register(namespace string) error {
|
||||||
|
return operatorrules.RegisterRecordingRules(
|
||||||
// MetricOpts represent CDI Prometheus Metrics
|
operatorRecordingRules(namespace),
|
||||||
type MetricOpts struct {
|
podsRecordingRules,
|
||||||
Name string
|
)
|
||||||
Help string
|
|
||||||
Type string
|
|
||||||
}
|
|
||||||
|
|
||||||
// RecordRulesDesc represent CDI Prometheus Record Rules
|
|
||||||
type RecordRulesDesc struct {
|
|
||||||
Opts MetricOpts
|
|
||||||
Expr string
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetRecordRulesDesc returns CDI Prometheus Record Rules
|
|
||||||
func GetRecordRulesDesc(namespace string) []RecordRulesDesc {
|
|
||||||
return []RecordRulesDesc{
|
|
||||||
{
|
|
||||||
MetricOpts{
|
|
||||||
"kubevirt_cdi_operator_up",
|
|
||||||
"CDI operator status",
|
|
||||||
"Gauge",
|
|
||||||
},
|
|
||||||
fmt.Sprintf("sum(up{namespace='%s', pod=~'cdi-operator-.*'} or vector(0))", namespace),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MetricOpts{
|
|
||||||
"kubevirt_cdi_import_pods_high_restart",
|
|
||||||
"The number of CDI import pods with high restart count",
|
|
||||||
"Gauge",
|
|
||||||
},
|
|
||||||
fmt.Sprintf("count(kube_pod_container_status_restarts_total{pod=~'%s-.*', container='%s'} > %s) or on() vector(0)", common.ImporterPodName, common.ImporterPodName, strconv.Itoa(common.UnusualRestartCountThreshold)),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MetricOpts{
|
|
||||||
"kubevirt_cdi_upload_pods_high_restart",
|
|
||||||
"The number of CDI upload server pods with high restart count",
|
|
||||||
"Gauge",
|
|
||||||
},
|
|
||||||
fmt.Sprintf("count(kube_pod_container_status_restarts_total{pod=~'%s-.*', container='%s'} > %s) or on() vector(0)", common.UploadPodName, common.UploadServerPodname, strconv.Itoa(common.UnusualRestartCountThreshold)),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MetricOpts{
|
|
||||||
"kubevirt_cdi_clone_pods_high_restart",
|
|
||||||
"The number of CDI clone pods with high restart count",
|
|
||||||
"Gauge",
|
|
||||||
},
|
|
||||||
fmt.Sprintf("count(kube_pod_container_status_restarts_total{pod=~'.*%s', container='%s'} > %s) or on() vector(0)", common.ClonerSourcePodNameSuffix, common.ClonerSourcePodName, strconv.Itoa(common.UnusualRestartCountThreshold)),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
49
pkg/monitoring/rules/rules.go
Normal file
49
pkg/monitoring/rules/rules.go
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
package rules
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/machadovilaca/operator-observability/pkg/operatorrules"
|
||||||
|
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||||
|
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/common"
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules/alerts"
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules/recordingrules"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
ruleName = "prometheus-cdi-rules"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SetupRules initializes recording and alert rules in a namespace.
|
||||||
|
func SetupRules(namespace string) error {
|
||||||
|
if err := recordingrules.Register(namespace); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := alerts.Register(namespace); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildPrometheusRule creates a PrometheusRule in a namespace.
|
||||||
|
func BuildPrometheusRule(namespace string) (*promv1.PrometheusRule, error) {
|
||||||
|
return operatorrules.BuildPrometheusRule(
|
||||||
|
ruleName,
|
||||||
|
namespace,
|
||||||
|
map[string]string{
|
||||||
|
common.CDIComponentLabel: "",
|
||||||
|
common.PrometheusLabelKey: common.PrometheusLabelValue,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListRecordingRules returns all configured recording rules.
|
||||||
|
func ListRecordingRules() []operatorrules.RecordingRule {
|
||||||
|
return operatorrules.ListRecordingRules()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListAlerts returns all configured alert rules.
|
||||||
|
func ListAlerts() []promv1.Rule {
|
||||||
|
return operatorrules.ListAlerts()
|
||||||
|
}
|
@ -1,81 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/machadovilaca/operator-observability/pkg/docs"
|
|
||||||
"github.com/machadovilaca/operator-observability/pkg/operatormetrics"
|
|
||||||
"github.com/machadovilaca/operator-observability/pkg/operatorrules"
|
|
||||||
cdiMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/cdi-controller"
|
|
||||||
operatorMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/operator-controller"
|
|
||||||
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules/recordingrules"
|
|
||||||
)
|
|
||||||
|
|
||||||
const tpl = `# Containerized Data Importer metrics
|
|
||||||
{{- range . }}
|
|
||||||
|
|
||||||
{{ $deprecatedVersion := "" -}}
|
|
||||||
{{- with index .ExtraFields "DeprecatedVersion" -}}
|
|
||||||
{{- $deprecatedVersion = printf " in %s" . -}}
|
|
||||||
{{- end -}}
|
|
||||||
|
|
||||||
{{- $stabilityLevel := "" -}}
|
|
||||||
{{- if and (.ExtraFields.StabilityLevel) (ne .ExtraFields.StabilityLevel "STABLE") -}}
|
|
||||||
{{- $stabilityLevel = printf "[%s%s] " .ExtraFields.StabilityLevel $deprecatedVersion -}}
|
|
||||||
{{- end -}}
|
|
||||||
|
|
||||||
### {{ .Name }}
|
|
||||||
{{ print $stabilityLevel }}{{ .Help }}. Type: {{ .Type -}}.
|
|
||||||
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
## Developing new metrics
|
|
||||||
|
|
||||||
All metrics documented here are auto-generated and reflect exactly what is being
|
|
||||||
exposed. After developing new metrics or changing old ones please regenerate
|
|
||||||
this document.
|
|
||||||
`
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
err := operatorMetrics.SetupMetrics()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = cdiMetrics.SetupMetrics()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
metricsList := operatorMetrics.ListMetrics()
|
|
||||||
recordingRulesList := convertToRecordingRules(recordingrules.GetRecordRulesDesc(""))
|
|
||||||
|
|
||||||
docsString := docs.BuildMetricsDocsWithCustomTemplate(metricsList, recordingRulesList, tpl)
|
|
||||||
fmt.Print(docsString)
|
|
||||||
}
|
|
||||||
|
|
||||||
func convertToRecordingRules(recordRulesDesc []recordingrules.RecordRulesDesc) []operatorrules.RecordingRule {
|
|
||||||
var recordingRules []operatorrules.RecordingRule
|
|
||||||
|
|
||||||
for _, ruleDesc := range recordRulesDesc {
|
|
||||||
recordingRule := operatorrules.RecordingRule{
|
|
||||||
MetricsOpts: operatormetrics.MetricOpts{
|
|
||||||
Name: ruleDesc.Opts.Name,
|
|
||||||
Help: ruleDesc.Opts.Help,
|
|
||||||
// Assuming the rest of the fields are correctly mapped
|
|
||||||
},
|
|
||||||
MetricType: convertRulesType(ruleDesc.Opts.Type),
|
|
||||||
}
|
|
||||||
recordingRules = append(recordingRules, recordingRule)
|
|
||||||
}
|
|
||||||
return recordingRules
|
|
||||||
}
|
|
||||||
|
|
||||||
// when adding new recording rule please note that
|
|
||||||
func convertRulesType(metricType string) operatormetrics.MetricType {
|
|
||||||
if metricType == "Gauge" {
|
|
||||||
return operatormetrics.GaugeType
|
|
||||||
// ... other cases ...
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
@ -24,7 +24,7 @@ go_library(
|
|||||||
"//pkg/controller/common:go_default_library",
|
"//pkg/controller/common:go_default_library",
|
||||||
"//pkg/feature-gates:go_default_library",
|
"//pkg/feature-gates:go_default_library",
|
||||||
"//pkg/monitoring/metrics/operator-controller:go_default_library",
|
"//pkg/monitoring/metrics/operator-controller:go_default_library",
|
||||||
"//pkg/monitoring/rules/recordingrules:go_default_library",
|
"//pkg/monitoring/rules:go_default_library",
|
||||||
"//pkg/operator:go_default_library",
|
"//pkg/operator:go_default_library",
|
||||||
"//pkg/operator/resources/cert:go_default_library",
|
"//pkg/operator/resources/cert:go_default_library",
|
||||||
"//pkg/operator/resources/cluster:go_default_library",
|
"//pkg/operator/resources/cluster:go_default_library",
|
||||||
@ -59,7 +59,6 @@ go_library(
|
|||||||
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library",
|
||||||
"//vendor/k8s.io/apiserver/pkg/authentication/user:go_default_library",
|
"//vendor/k8s.io/apiserver/pkg/authentication/user:go_default_library",
|
||||||
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
||||||
@ -94,6 +93,8 @@ go_test(
|
|||||||
embed = [":go_default_library"],
|
embed = [":go_default_library"],
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/common:go_default_library",
|
"//pkg/common:go_default_library",
|
||||||
|
"//pkg/monitoring/rules:go_default_library",
|
||||||
|
"//pkg/monitoring/rules/alerts:go_default_library",
|
||||||
"//pkg/operator/resources/cert:go_default_library",
|
"//pkg/operator/resources/cert:go_default_library",
|
||||||
"//pkg/operator/resources/cluster:go_default_library",
|
"//pkg/operator/resources/cluster:go_default_library",
|
||||||
"//pkg/operator/resources/namespaced:go_default_library",
|
"//pkg/operator/resources/namespaced:go_default_library",
|
||||||
|
@ -40,6 +40,7 @@ import (
|
|||||||
|
|
||||||
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
|
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
|
||||||
metrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/operator-controller"
|
metrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/operator-controller"
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules"
|
||||||
"kubevirt.io/containerized-data-importer/pkg/operator"
|
"kubevirt.io/containerized-data-importer/pkg/operator"
|
||||||
cdicerts "kubevirt.io/containerized-data-importer/pkg/operator/resources/cert"
|
cdicerts "kubevirt.io/containerized-data-importer/pkg/operator/resources/cert"
|
||||||
cdicluster "kubevirt.io/containerized-data-importer/pkg/operator/resources/cluster"
|
cdicluster "kubevirt.io/containerized-data-importer/pkg/operator/resources/cluster"
|
||||||
@ -129,6 +130,11 @@ func newReconciler(mgr manager.Manager) (*ReconcileCDI, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = rules.SetupRules(namespace)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
recorder := mgr.GetEventRecorderFor("operator-controller")
|
recorder := mgr.GetEventRecorderFor("operator-controller")
|
||||||
|
|
||||||
r := &ReconcileCDI{
|
r := &ReconcileCDI{
|
||||||
|
@ -25,9 +25,8 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"kubevirt.io/controller-lifecycle-operator-sdk/pkg/sdk/callbacks"
|
|
||||||
|
|
||||||
sdkapi "kubevirt.io/controller-lifecycle-operator-sdk/api"
|
sdkapi "kubevirt.io/controller-lifecycle-operator-sdk/api"
|
||||||
|
"kubevirt.io/controller-lifecycle-operator-sdk/pkg/sdk/callbacks"
|
||||||
sdkr "kubevirt.io/controller-lifecycle-operator-sdk/pkg/sdk/reconciler"
|
sdkr "kubevirt.io/controller-lifecycle-operator-sdk/pkg/sdk/reconciler"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
@ -57,6 +56,8 @@ import (
|
|||||||
|
|
||||||
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
|
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
|
||||||
"kubevirt.io/containerized-data-importer/pkg/common"
|
"kubevirt.io/containerized-data-importer/pkg/common"
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules"
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules/alerts"
|
||||||
clusterResources "kubevirt.io/containerized-data-importer/pkg/operator/resources/cluster"
|
clusterResources "kubevirt.io/containerized-data-importer/pkg/operator/resources/cluster"
|
||||||
namespaceResources "kubevirt.io/containerized-data-importer/pkg/operator/resources/namespaced"
|
namespaceResources "kubevirt.io/containerized-data-importer/pkg/operator/resources/namespaced"
|
||||||
utils "kubevirt.io/containerized-data-importer/pkg/operator/resources/utils"
|
utils "kubevirt.io/containerized-data-importer/pkg/operator/resources/utils"
|
||||||
@ -285,7 +286,7 @@ var _ = Describe("Controller", func() {
|
|||||||
doReconcile(args)
|
doReconcile(args)
|
||||||
Expect(setDeploymentsReady(args)).To(BeTrue())
|
Expect(setDeploymentsReady(args)).To(BeTrue())
|
||||||
|
|
||||||
runbookURLTemplate := getRunbookURLTemplate()
|
runbookURLTemplate := alerts.GetRunbookURLTemplate()
|
||||||
|
|
||||||
rule := &promv1.PrometheusRule{
|
rule := &promv1.PrometheusRule{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@ -313,7 +314,7 @@ var _ = Describe("Controller", func() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
Expect(rule.Spec.Groups[0].Rules).To(ContainElement(cdiDownAlert))
|
Expect(rule.Spec.Groups[1].Rules).To(ContainElement(cdiDownAlert))
|
||||||
Expect(rule.Labels[common.AppKubernetesPartOfLabel]).To(Equal("testing"))
|
Expect(rule.Labels[common.AppKubernetesPartOfLabel]).To(Equal("testing"))
|
||||||
validateEvents(args.reconciler, createReadyEventValidationMap())
|
validateEvents(args.reconciler, createReadyEventValidationMap())
|
||||||
})
|
})
|
||||||
@ -1737,6 +1738,11 @@ func createReconciler(client client.Client) *ReconcileCDI {
|
|||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err := rules.SetupRules(namespace)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
recorder := record.NewFakeRecorder(250)
|
recorder := record.NewFakeRecorder(250)
|
||||||
r := &ReconcileCDI{
|
r := &ReconcileCDI{
|
||||||
client: client,
|
client: client,
|
||||||
|
@ -18,11 +18,9 @@ package controller
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/go-logr/logr"
|
"github.com/go-logr/logr"
|
||||||
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||||
@ -31,14 +29,13 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/api/meta"
|
"k8s.io/apimachinery/pkg/api/meta"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/apimachinery/pkg/util/intstr"
|
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/source"
|
"sigs.k8s.io/controller-runtime/pkg/source"
|
||||||
|
|
||||||
"kubevirt.io/containerized-data-importer/pkg/common"
|
"kubevirt.io/containerized-data-importer/pkg/common"
|
||||||
cc "kubevirt.io/containerized-data-importer/pkg/controller/common"
|
cc "kubevirt.io/containerized-data-importer/pkg/controller/common"
|
||||||
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules/recordingrules"
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules"
|
||||||
cdinamespaced "kubevirt.io/containerized-data-importer/pkg/operator/resources/namespaced"
|
cdinamespaced "kubevirt.io/containerized-data-importer/pkg/operator/resources/namespaced"
|
||||||
"kubevirt.io/containerized-data-importer/pkg/util"
|
"kubevirt.io/containerized-data-importer/pkg/util"
|
||||||
|
|
||||||
@ -52,12 +49,6 @@ const (
|
|||||||
defaultMonitoringNs = "monitoring"
|
defaultMonitoringNs = "monitoring"
|
||||||
defaultRunbookURLTemplate = "https://kubevirt.io/monitoring/runbooks/%s"
|
defaultRunbookURLTemplate = "https://kubevirt.io/monitoring/runbooks/%s"
|
||||||
runbookURLTemplateEnv = "RUNBOOK_URL_TEMPLATE"
|
runbookURLTemplateEnv = "RUNBOOK_URL_TEMPLATE"
|
||||||
severityAlertLabelKey = "severity"
|
|
||||||
healthImpactAlertLabelKey = "operator_health_impact"
|
|
||||||
partOfAlertLabelKey = "kubernetes_operator_part_of"
|
|
||||||
partOfAlertLabelValue = "kubevirt"
|
|
||||||
componentAlertLabelKey = "kubernetes_operator_component"
|
|
||||||
componentAlertLabelValue = common.CDILabelValue
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func ensurePrometheusResourcesExist(ctx context.Context, c client.Client, scheme *runtime.Scheme, owner metav1.Object) error {
|
func ensurePrometheusResourcesExist(ctx context.Context, c client.Client, scheme *runtime.Scheme, owner metav1.Object) error {
|
||||||
@ -134,166 +125,15 @@ func isPrometheusDeployed(logger logr.Logger, c client.Client, namespace string)
|
|||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRecordRules(namespace string) []promv1.Rule {
|
|
||||||
var recordRules []promv1.Rule
|
|
||||||
|
|
||||||
for _, rrd := range recordingrules.GetRecordRulesDesc(namespace) {
|
|
||||||
recordRules = append(recordRules, generateRecordRule(rrd.Opts.Name, rrd.Expr))
|
|
||||||
}
|
|
||||||
|
|
||||||
return recordRules
|
|
||||||
}
|
|
||||||
|
|
||||||
func getAlertRules(runbookURLTemplate string) []promv1.Rule {
|
|
||||||
return []promv1.Rule{
|
|
||||||
generateAlertRule(
|
|
||||||
"CDIOperatorDown",
|
|
||||||
"kubevirt_cdi_operator_up == 0",
|
|
||||||
promv1.Duration("5m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "CDI operator is down",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDIOperatorDown"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "warning",
|
|
||||||
healthImpactAlertLabelKey: "critical",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
generateAlertRule(
|
|
||||||
"CDINotReady",
|
|
||||||
"kubevirt_cdi_cr_ready == 0",
|
|
||||||
promv1.Duration("5m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "CDI is not available to use",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDINotReady"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "warning",
|
|
||||||
healthImpactAlertLabelKey: "critical",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
generateAlertRule(
|
|
||||||
"CDIDataVolumeUnusualRestartCount",
|
|
||||||
`kubevirt_cdi_import_pods_high_restart > 0 or
|
|
||||||
kubevirt_cdi_upload_pods_high_restart > 0 or
|
|
||||||
kubevirt_cdi_clone_pods_high_restart > 0`,
|
|
||||||
promv1.Duration("5m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "Some CDI population workloads have an unusual restart count, meaning they are probably failing and need to be investigated",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDIDataVolumeUnusualRestartCount"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "warning",
|
|
||||||
healthImpactAlertLabelKey: "warning",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
generateAlertRule(
|
|
||||||
"CDIStorageProfilesIncomplete",
|
|
||||||
`sum by(storageclass,provisioner) ((kubevirt_cdi_storageprofile_info{complete="false"}>0))`,
|
|
||||||
promv1.Duration("5m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "Incomplete StorageProfile {{ $labels.storageclass }}, accessMode/volumeMode cannot be inferred by CDI for PVC population request",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDIStorageProfilesIncomplete"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "info",
|
|
||||||
healthImpactAlertLabelKey: "warning",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
generateAlertRule(
|
|
||||||
"CDIDataImportCronOutdated",
|
|
||||||
`sum by(ns,cron_name) (kubevirt_cdi_dataimportcron_outdated) > 0`,
|
|
||||||
promv1.Duration("15m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "DataImportCron (recurring polling of VM templates disk image sources, also known as golden images) PVCs are not being updated on the defined schedule",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDIDataImportCronOutdated"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "info",
|
|
||||||
healthImpactAlertLabelKey: "warning",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
generateAlertRule(
|
|
||||||
"CDINoDefaultStorageClass",
|
|
||||||
`sum(kubevirt_cdi_storageprofile_info{default="true"} or on() vector(0)) +
|
|
||||||
sum(kubevirt_cdi_storageprofile_info{virtdefault="true"} or on() vector(0)) +
|
|
||||||
(count(kubevirt_cdi_datavolume_pending == 0) or on() vector(0)) == 0`,
|
|
||||||
promv1.Duration("5m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "No default StorageClass or virtualization StorageClass, and a DataVolume is pending for one",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDINoDefaultStorageClass"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "warning",
|
|
||||||
healthImpactAlertLabelKey: "none",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
generateAlertRule(
|
|
||||||
"CDIMultipleDefaultVirtStorageClasses",
|
|
||||||
`sum(kubevirt_cdi_storageprofile_info{virtdefault="true"} or on() vector(0)) > 1`,
|
|
||||||
promv1.Duration("5m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "More than one default virtualization StorageClass detected",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDIMultipleDefaultVirtStorageClasses"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "warning",
|
|
||||||
healthImpactAlertLabelKey: "none",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
generateAlertRule(
|
|
||||||
"CDIDefaultStorageClassDegraded",
|
|
||||||
`sum(kubevirt_cdi_storageprofile_info{default="true",rwx="true",smartclone="true"} or on() vector(0)) +
|
|
||||||
sum(kubevirt_cdi_storageprofile_info{virtdefault="true",rwx="true",smartclone="true"} or on() vector(0)) == 0`,
|
|
||||||
promv1.Duration("5m"),
|
|
||||||
map[string]string{
|
|
||||||
"summary": "Default storage class has no smart clone or ReadWriteMany",
|
|
||||||
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDIDefaultStorageClassDegraded"),
|
|
||||||
},
|
|
||||||
map[string]string{
|
|
||||||
severityAlertLabelKey: "warning",
|
|
||||||
healthImpactAlertLabelKey: "none",
|
|
||||||
partOfAlertLabelKey: partOfAlertLabelValue,
|
|
||||||
componentAlertLabelKey: componentAlertLabelValue,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func newPrometheusRule(namespace string) *promv1.PrometheusRule {
|
func newPrometheusRule(namespace string) *promv1.PrometheusRule {
|
||||||
runbookURLTemplate := getRunbookURLTemplate()
|
promRule, err := rules.BuildPrometheusRule(namespace)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
return &promv1.PrometheusRule{
|
return &promv1.PrometheusRule{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: promRule.ObjectMeta,
|
||||||
Name: ruleName,
|
Spec: promRule.Spec,
|
||||||
Namespace: namespace,
|
|
||||||
Labels: map[string]string{
|
|
||||||
common.CDIComponentLabel: "",
|
|
||||||
common.PrometheusLabelKey: common.PrometheusLabelValue,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Spec: promv1.PrometheusRuleSpec{
|
|
||||||
Groups: []promv1.RuleGroup{
|
|
||||||
{
|
|
||||||
Name: "cdi.rules",
|
|
||||||
Rules: append(getRecordRules(namespace), getAlertRules(runbookURLTemplate)...),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -381,23 +221,6 @@ func newPrometheusServiceMonitor(namespace string) *promv1.ServiceMonitor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateAlertRule(alert, expr string, duration promv1.Duration, annotations, labels map[string]string) promv1.Rule {
|
|
||||||
return promv1.Rule{
|
|
||||||
Alert: alert,
|
|
||||||
Expr: intstr.FromString(expr),
|
|
||||||
For: &duration,
|
|
||||||
Annotations: annotations,
|
|
||||||
Labels: labels,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateRecordRule(record, expr string) promv1.Rule {
|
|
||||||
return promv1.Rule{
|
|
||||||
Record: record,
|
|
||||||
Expr: intstr.FromString(expr),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *ReconcileCDI) watchPrometheusResources() error {
|
func (r *ReconcileCDI) watchPrometheusResources() error {
|
||||||
listObjs := []client.ObjectList{
|
listObjs := []client.ObjectList{
|
||||||
&promv1.PrometheusRuleList{},
|
&promv1.PrometheusRuleList{},
|
||||||
@ -439,16 +262,3 @@ func (r *ReconcileCDI) watchPrometheusResources() error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRunbookURLTemplate() string {
|
|
||||||
runbookURLTemplate, exists := os.LookupEnv(runbookURLTemplateEnv)
|
|
||||||
if !exists {
|
|
||||||
runbookURLTemplate = defaultRunbookURLTemplate
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.Count(runbookURLTemplate, "%s") != 1 {
|
|
||||||
panic(errors.New("runbook URL template must have exactly 1 %s substring"))
|
|
||||||
}
|
|
||||||
|
|
||||||
return runbookURLTemplate
|
|
||||||
}
|
|
||||||
|
@ -3,15 +3,13 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
|
|||||||
go_library(
|
go_library(
|
||||||
name = "go_default_library",
|
name = "go_default_library",
|
||||||
srcs = ["metricsdocs.go"],
|
srcs = ["metricsdocs.go"],
|
||||||
importpath = "kubevirt.io/containerized-data-importer/pkg/monitoring/tools/metricsdocs",
|
importpath = "kubevirt.io/containerized-data-importer/tools/metricsdocs",
|
||||||
visibility = ["//visibility:private"],
|
visibility = ["//visibility:private"],
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/monitoring/metrics/cdi-controller:go_default_library",
|
"//pkg/monitoring/metrics/cdi-controller:go_default_library",
|
||||||
"//pkg/monitoring/metrics/operator-controller:go_default_library",
|
"//pkg/monitoring/metrics/operator-controller:go_default_library",
|
||||||
"//pkg/monitoring/rules/recordingrules:go_default_library",
|
"//pkg/monitoring/rules:go_default_library",
|
||||||
"//vendor/github.com/machadovilaca/operator-observability/pkg/docs:go_default_library",
|
"//vendor/github.com/machadovilaca/operator-observability/pkg/docs:go_default_library",
|
||||||
"//vendor/github.com/machadovilaca/operator-observability/pkg/operatormetrics:go_default_library",
|
|
||||||
"//vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules:go_default_library",
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
56
tools/metricsdocs/metricsdocs.go
Normal file
56
tools/metricsdocs/metricsdocs.go
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/machadovilaca/operator-observability/pkg/docs"
|
||||||
|
|
||||||
|
cdiMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/cdi-controller"
|
||||||
|
operatorMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/operator-controller"
|
||||||
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules"
|
||||||
|
)
|
||||||
|
|
||||||
|
const tpl = `# Containerized Data Importer metrics
|
||||||
|
{{- range . }}
|
||||||
|
|
||||||
|
{{ $deprecatedVersion := "" -}}
|
||||||
|
{{- with index .ExtraFields "DeprecatedVersion" -}}
|
||||||
|
{{- $deprecatedVersion = printf " in %s" . -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{- $stabilityLevel := "" -}}
|
||||||
|
{{- if and (.ExtraFields.StabilityLevel) (ne .ExtraFields.StabilityLevel "STABLE") -}}
|
||||||
|
{{- $stabilityLevel = printf "[%s%s] " .ExtraFields.StabilityLevel $deprecatedVersion -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
### {{ .Name }}
|
||||||
|
{{ print $stabilityLevel }}{{ .Help }}. Type: {{ .Type -}}.
|
||||||
|
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
## Developing new metrics
|
||||||
|
|
||||||
|
All metrics documented here are auto-generated and reflect exactly what is being
|
||||||
|
exposed. After developing new metrics or changing old ones please regenerate
|
||||||
|
this document.
|
||||||
|
`
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
err := operatorMetrics.SetupMetrics()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cdiMetrics.SetupMetrics()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := rules.SetupRules("test"); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
docsString := docs.BuildMetricsDocsWithCustomTemplate(operatorMetrics.ListMetrics(), rules.ListRecordingRules(), tpl)
|
||||||
|
|
||||||
|
fmt.Print(docsString)
|
||||||
|
}
|
@ -3,12 +3,12 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
|
|||||||
go_library(
|
go_library(
|
||||||
name = "go_default_library",
|
name = "go_default_library",
|
||||||
srcs = ["metrics_json_generator.go"],
|
srcs = ["metrics_json_generator.go"],
|
||||||
importpath = "kubevirt.io/containerized-data-importer/pkg/monitoring/tools/prom-metrics-collector",
|
importpath = "kubevirt.io/containerized-data-importer/tools/prom-metrics-collector",
|
||||||
visibility = ["//visibility:private"],
|
visibility = ["//visibility:private"],
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/monitoring/metrics/cdi-controller:go_default_library",
|
"//pkg/monitoring/metrics/cdi-controller:go_default_library",
|
||||||
"//pkg/monitoring/metrics/operator-controller:go_default_library",
|
"//pkg/monitoring/metrics/operator-controller:go_default_library",
|
||||||
"//pkg/monitoring/rules/recordingrules:go_default_library",
|
"//pkg/monitoring/rules:go_default_library",
|
||||||
"//vendor/github.com/kubevirt/monitoring/pkg/metrics/parser:go_default_library",
|
"//vendor/github.com/kubevirt/monitoring/pkg/metrics/parser:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
@ -8,7 +8,7 @@ import (
|
|||||||
"github.com/kubevirt/monitoring/pkg/metrics/parser"
|
"github.com/kubevirt/monitoring/pkg/metrics/parser"
|
||||||
cdiMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/cdi-controller"
|
cdiMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/cdi-controller"
|
||||||
operatorMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/operator-controller"
|
operatorMetrics "kubevirt.io/containerized-data-importer/pkg/monitoring/metrics/operator-controller"
|
||||||
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules/recordingrules"
|
"kubevirt.io/containerized-data-importer/pkg/monitoring/rules"
|
||||||
)
|
)
|
||||||
|
|
||||||
// This should be used only for very rare cases where the naming conventions that are explained in the best practices:
|
// This should be used only for very rare cases where the naming conventions that are explained in the best practices:
|
||||||
@ -27,6 +27,10 @@ func main() {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := rules.SetupRules("test"); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
var metricFamilies []parser.Metric
|
var metricFamilies []parser.Metric
|
||||||
|
|
||||||
metricsList := operatorMetrics.ListMetrics()
|
metricsList := operatorMetrics.ListMetrics()
|
||||||
@ -40,13 +44,15 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
recordingRules := recordingrules.GetRecordRulesDesc("")
|
rulesList := rules.ListRecordingRules()
|
||||||
for _, r := range recordingRules {
|
for _, r := range rulesList {
|
||||||
metricFamilies = append(metricFamilies, parser.Metric{
|
if _, isExcludedMetric := excludedMetrics[r.GetOpts().Name]; !isExcludedMetric {
|
||||||
Name: r.Opts.Name,
|
metricFamilies = append(metricFamilies, parser.Metric{
|
||||||
Help: r.Opts.Help,
|
Name: r.GetOpts().Name,
|
||||||
Type: strings.ToUpper(r.Opts.Type),
|
Help: r.GetOpts().Help,
|
||||||
})
|
Type: strings.ToUpper(string(r.GetType())),
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonBytes, err := json.Marshal(metricFamilies)
|
jsonBytes, err := json.Marshal(metricFamilies)
|
3
vendor/github.com/machadovilaca/operator-observability/pkg/docs/metrics.go
generated
vendored
3
vendor/github.com/machadovilaca/operator-observability/pkg/docs/metrics.go
generated
vendored
@ -63,6 +63,8 @@ func BuildMetricsDocsWithCustomTemplate(
|
|||||||
allDocs = append(allDocs, buildMetricsDocs(recordingRules)...)
|
allDocs = append(allDocs, buildMetricsDocs(recordingRules)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sortMetricsDocs(allDocs)
|
||||||
|
|
||||||
buf := bytes.NewBufferString("")
|
buf := bytes.NewBufferString("")
|
||||||
err = tpl.Execute(buf, allDocs)
|
err = tpl.Execute(buf, allDocs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -89,7 +91,6 @@ func buildMetricsDocs[T docOptions](items []T) []metricDocs {
|
|||||||
ExtraFields: metricOpts.ExtraFields,
|
ExtraFields: metricOpts.ExtraFields,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sortMetricsDocs(metricsDocs)
|
|
||||||
|
|
||||||
return metricsDocs
|
return metricsDocs
|
||||||
}
|
}
|
||||||
|
11
vendor/github.com/machadovilaca/operator-observability/pkg/operatormetrics/collector.go
generated
vendored
11
vendor/github.com/machadovilaca/operator-observability/pkg/operatormetrics/collector.go
generated
vendored
@ -2,6 +2,7 @@ package operatormetrics
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
)
|
)
|
||||||
@ -24,6 +25,16 @@ type CollectorResult struct {
|
|||||||
Value float64
|
Value float64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c Collector) hash() string {
|
||||||
|
var sb strings.Builder
|
||||||
|
|
||||||
|
for _, cm := range c.Metrics {
|
||||||
|
sb.WriteString(cm.GetOpts().Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
func (c Collector) Describe(ch chan<- *prometheus.Desc) {
|
func (c Collector) Describe(ch chan<- *prometheus.Desc) {
|
||||||
for _, cm := range c.Metrics {
|
for _, cm := range c.Metrics {
|
||||||
cm.getCollector().Describe(ch)
|
cm.getCollector().Describe(ch)
|
||||||
|
@ -5,6 +5,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type RegistryFunc func(c prometheus.Collector) error
|
type RegistryFunc func(c prometheus.Collector) error
|
||||||
|
type UnregisterFunc func(c prometheus.Collector) bool
|
||||||
|
|
||||||
// Register is the function used to register metrics and collectors by this package.
|
// Register is the function used to register metrics and collectors by this package.
|
||||||
var Register RegistryFunc = prometheus.Register
|
var Register RegistryFunc = prometheus.Register
|
||||||
|
|
||||||
|
// Unregister is the function used to unregister metrics and collectors by this package.
|
||||||
|
var Unregister UnregisterFunc = prometheus.Unregister
|
||||||
|
@ -1,15 +1,24 @@
|
|||||||
package operatormetrics
|
package operatormetrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"fmt"
|
||||||
|
"slices"
|
||||||
|
)
|
||||||
|
|
||||||
var operatorRegistry = newRegistry()
|
var operatorRegistry = newRegistry()
|
||||||
|
|
||||||
type operatorRegisterer struct {
|
type operatorRegisterer struct {
|
||||||
registeredMetrics map[string]Metric
|
registeredMetrics map[string]Metric
|
||||||
|
|
||||||
|
registeredCollectors map[string]Collector
|
||||||
registeredCollectorMetrics map[string]Metric
|
registeredCollectorMetrics map[string]Metric
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRegistry() operatorRegisterer {
|
func newRegistry() operatorRegisterer {
|
||||||
return operatorRegisterer{
|
return operatorRegisterer{
|
||||||
registeredMetrics: map[string]Metric{},
|
registeredMetrics: map[string]Metric{},
|
||||||
|
registeredCollectors: map[string]Collector{},
|
||||||
registeredCollectorMetrics: map[string]Metric{},
|
registeredCollectorMetrics: map[string]Metric{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -18,11 +27,17 @@ func newRegistry() operatorRegisterer {
|
|||||||
func RegisterMetrics(allMetrics ...[]Metric) error {
|
func RegisterMetrics(allMetrics ...[]Metric) error {
|
||||||
for _, metricList := range allMetrics {
|
for _, metricList := range allMetrics {
|
||||||
for _, metric := range metricList {
|
for _, metric := range metricList {
|
||||||
err := Register(metric.getCollector())
|
if metricExists(metric) {
|
||||||
|
err := unregisterMetric(metric)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := registerMetric(metric)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
operatorRegistry.registeredMetrics[metric.GetOpts().Name] = metric
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,13 +47,16 @@ func RegisterMetrics(allMetrics ...[]Metric) error {
|
|||||||
// RegisterCollector registers the collector with the Prometheus registry.
|
// RegisterCollector registers the collector with the Prometheus registry.
|
||||||
func RegisterCollector(collectors ...Collector) error {
|
func RegisterCollector(collectors ...Collector) error {
|
||||||
for _, collector := range collectors {
|
for _, collector := range collectors {
|
||||||
err := Register(collector)
|
if collectorExists(collector) {
|
||||||
if err != nil {
|
err := unregisterCollector(collector)
|
||||||
return err
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, cm := range collector.Metrics {
|
err := registerCollector(collector)
|
||||||
operatorRegistry.registeredCollectorMetrics[cm.GetOpts().Name] = cm
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,5 +75,92 @@ func ListMetrics() []Metric {
|
|||||||
result = append(result, rc)
|
result = append(result, rc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slices.SortFunc(result, func(a, b Metric) int {
|
||||||
|
return cmp.Compare(a.GetOpts().Name, b.GetOpts().Name)
|
||||||
|
})
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CleanRegistry removes all registered metrics.
|
||||||
|
func CleanRegistry() error {
|
||||||
|
for _, metric := range operatorRegistry.registeredMetrics {
|
||||||
|
err := unregisterMetric(metric)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, collector := range operatorRegistry.registeredCollectors {
|
||||||
|
err := unregisterCollector(collector)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricExists(metric Metric) bool {
|
||||||
|
_, ok := operatorRegistry.registeredMetrics[metric.GetOpts().Name]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func unregisterMetric(metric Metric) error {
|
||||||
|
if succeeded := Unregister(metric.getCollector()); succeeded {
|
||||||
|
delete(operatorRegistry.registeredMetrics, metric.GetOpts().Name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("failed to unregister from Prometheus client metric %s", metric.GetOpts().Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
func registerMetric(metric Metric) error {
|
||||||
|
err := Register(metric.getCollector())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
operatorRegistry.registeredMetrics[metric.GetOpts().Name] = metric
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func collectorExists(collector Collector) bool {
|
||||||
|
_, ok := operatorRegistry.registeredCollectors[collector.hash()]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func unregisterCollector(collector Collector) error {
|
||||||
|
if succeeded := Unregister(collector); succeeded {
|
||||||
|
delete(operatorRegistry.registeredCollectors, collector.hash())
|
||||||
|
for _, metric := range collector.Metrics {
|
||||||
|
delete(operatorRegistry.registeredCollectorMetrics, metric.GetOpts().Name)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("failed to unregister from Prometheus client collector with metrics: %s", buildCollectorMetricListString(collector))
|
||||||
|
}
|
||||||
|
|
||||||
|
func registerCollector(collector Collector) error {
|
||||||
|
err := Register(collector)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
operatorRegistry.registeredCollectors[collector.hash()] = collector
|
||||||
|
for _, cm := range collector.Metrics {
|
||||||
|
operatorRegistry.registeredCollectorMetrics[cm.GetOpts().Name] = cm
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildCollectorMetricListString(collector Collector) string {
|
||||||
|
metricsList := ""
|
||||||
|
for _, metric := range collector.Metrics {
|
||||||
|
metricsList += metric.GetOpts().Name + ", "
|
||||||
|
}
|
||||||
|
metricsList = metricsList[:len(metricsList)-2]
|
||||||
|
return metricsList
|
||||||
|
}
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
package operatorrules
|
package operatorrules
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"slices"
|
||||||
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
|
||||||
@ -42,7 +44,7 @@ func buildPrometheusRuleSpec() (*promv1.PrometheusRuleSpec, error) {
|
|||||||
if len(operatorRegistry.registeredAlerts) != 0 {
|
if len(operatorRegistry.registeredAlerts) != 0 {
|
||||||
groups = append(groups, promv1.RuleGroup{
|
groups = append(groups, promv1.RuleGroup{
|
||||||
Name: "alerts.rules",
|
Name: "alerts.rules",
|
||||||
Rules: buildAlertsRules(),
|
Rules: ListAlerts(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -63,11 +65,9 @@ func buildRecordingRulesRules() []promv1.Rule {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return rules
|
slices.SortFunc(rules, func(a, b promv1.Rule) int {
|
||||||
}
|
return cmp.Compare(a.Record, b.Record)
|
||||||
|
})
|
||||||
|
|
||||||
func buildAlertsRules() []promv1.Rule {
|
|
||||||
var rules []promv1.Rule
|
|
||||||
rules = append(rules, operatorRegistry.registeredAlerts...)
|
|
||||||
return rules
|
return rules
|
||||||
}
|
}
|
||||||
|
46
vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules/registry.go
generated
vendored
46
vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules/registry.go
generated
vendored
@ -1,26 +1,32 @@
|
|||||||
package operatorrules
|
package operatorrules
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
|
"slices"
|
||||||
|
|
||||||
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||||
)
|
)
|
||||||
|
|
||||||
var operatorRegistry = newRegistry()
|
var operatorRegistry = newRegistry()
|
||||||
|
|
||||||
type operatorRegisterer struct {
|
type operatorRegisterer struct {
|
||||||
registeredRecordingRules []RecordingRule
|
registeredRecordingRules map[string]RecordingRule
|
||||||
registeredAlerts []promv1.Rule
|
registeredAlerts map[string]promv1.Rule
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRegistry() operatorRegisterer {
|
func newRegistry() operatorRegisterer {
|
||||||
return operatorRegisterer{
|
return operatorRegisterer{
|
||||||
registeredRecordingRules: []RecordingRule{},
|
registeredRecordingRules: map[string]RecordingRule{},
|
||||||
|
registeredAlerts: map[string]promv1.Rule{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// RegisterRecordingRules registers the given recording rules.
|
// RegisterRecordingRules registers the given recording rules.
|
||||||
func RegisterRecordingRules(recordingRules ...[]RecordingRule) error {
|
func RegisterRecordingRules(recordingRules ...[]RecordingRule) error {
|
||||||
for _, recordingRuleList := range recordingRules {
|
for _, recordingRuleList := range recordingRules {
|
||||||
operatorRegistry.registeredRecordingRules = append(operatorRegistry.registeredRecordingRules, recordingRuleList...)
|
for _, recordingRule := range recordingRuleList {
|
||||||
|
operatorRegistry.registeredRecordingRules[recordingRule.MetricsOpts.Name] = recordingRule
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -29,7 +35,9 @@ func RegisterRecordingRules(recordingRules ...[]RecordingRule) error {
|
|||||||
// RegisterAlerts registers the given alerts.
|
// RegisterAlerts registers the given alerts.
|
||||||
func RegisterAlerts(alerts ...[]promv1.Rule) error {
|
func RegisterAlerts(alerts ...[]promv1.Rule) error {
|
||||||
for _, alertList := range alerts {
|
for _, alertList := range alerts {
|
||||||
operatorRegistry.registeredAlerts = append(operatorRegistry.registeredAlerts, alertList...)
|
for _, alert := range alertList {
|
||||||
|
operatorRegistry.registeredAlerts[alert.Alert] = alert
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -37,10 +45,34 @@ func RegisterAlerts(alerts ...[]promv1.Rule) error {
|
|||||||
|
|
||||||
// ListRecordingRules returns the registered recording rules.
|
// ListRecordingRules returns the registered recording rules.
|
||||||
func ListRecordingRules() []RecordingRule {
|
func ListRecordingRules() []RecordingRule {
|
||||||
return operatorRegistry.registeredRecordingRules
|
var rules []RecordingRule
|
||||||
|
for _, rule := range operatorRegistry.registeredRecordingRules {
|
||||||
|
rules = append(rules, rule)
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.SortFunc(rules, func(a, b RecordingRule) int {
|
||||||
|
return cmp.Compare(a.GetOpts().Name, b.GetOpts().Name)
|
||||||
|
})
|
||||||
|
|
||||||
|
return rules
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListAlerts returns the registered alerts.
|
// ListAlerts returns the registered alerts.
|
||||||
func ListAlerts() []promv1.Rule {
|
func ListAlerts() []promv1.Rule {
|
||||||
return operatorRegistry.registeredAlerts
|
var alerts []promv1.Rule
|
||||||
|
for _, alert := range operatorRegistry.registeredAlerts {
|
||||||
|
alerts = append(alerts, alert)
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.SortFunc(alerts, func(a, b promv1.Rule) int {
|
||||||
|
return cmp.Compare(a.Alert, b.Alert)
|
||||||
|
})
|
||||||
|
|
||||||
|
return alerts
|
||||||
|
}
|
||||||
|
|
||||||
|
// CleanRegistry removes all registered rules and alerts.
|
||||||
|
func CleanRegistry() error {
|
||||||
|
operatorRegistry = newRegistry()
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
4
vendor/modules.txt
vendored
4
vendor/modules.txt
vendored
@ -357,8 +357,8 @@ github.com/kubernetes-csi/lib-volume-populator/populator-machinery
|
|||||||
# github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a
|
# github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a
|
||||||
## explicit; go 1.20
|
## explicit; go 1.20
|
||||||
github.com/kubevirt/monitoring/pkg/metrics/parser
|
github.com/kubevirt/monitoring/pkg/metrics/parser
|
||||||
# github.com/machadovilaca/operator-observability v0.0.9
|
# github.com/machadovilaca/operator-observability v0.0.13
|
||||||
## explicit; go 1.20
|
## explicit; go 1.21
|
||||||
github.com/machadovilaca/operator-observability/pkg/docs
|
github.com/machadovilaca/operator-observability/pkg/docs
|
||||||
github.com/machadovilaca/operator-observability/pkg/operatormetrics
|
github.com/machadovilaca/operator-observability/pkg/operatormetrics
|
||||||
github.com/machadovilaca/operator-observability/pkg/operatorrules
|
github.com/machadovilaca/operator-observability/pkg/operatorrules
|
||||||
|
Loading…
Reference in New Issue
Block a user