diff --git a/docs/metrics.md b/docs/metrics.md index 722bbbb02908..3502a88c98ed 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -12,6 +12,9 @@ All metrics documented here are auto-generated by the utility tool `tools/doc-ge ### kubevirt_info Version information. +### kubevirt_abnormal_state +Monitors resources for potential problems. Type: Gauge. + ### kubevirt_allocatable_nodes The number of allocatable nodes in the cluster. Type: Gauge. @@ -30,6 +33,12 @@ The number of nodes in the cluster that have the devices.kubevirt.io/kvm resourc ### kubevirt_number_of_vms The number of VMs in the cluster by namespace. Type: Gauge. +### kubevirt_over_subscribed_container_rss_memory_bytes +The pod with the highest exceeded memory for each container based on the rss. Type: Gauge. + +### kubevirt_over_subscribed_container_working_set_memory_bytes +The pod with the highest exceeded memory for each container based on the working set. Type: Gauge. + ### kubevirt_portforward_active_tunnels Amount of active portforward tunnels, broken down by namespace and vmi name. Type: Gauge. diff --git a/go.mod b/go.mod index 5b3831af33f3..c20fbbf94b4f 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/krolaw/dhcp4 v0.0.0-20180925202202-7cead472c414 github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a - github.com/machadovilaca/operator-observability v0.0.14 + github.com/machadovilaca/operator-observability v0.0.17 github.com/mdlayher/vsock v1.2.1 github.com/mitchellh/go-ps v0.0.0-20190716172923-621e5597135b github.com/mitchellh/go-vnc v0.0.0-20150629162542-723ed9867aed diff --git a/go.sum b/go.sum index 7d61ac04e817..7164ca341a14 100644 --- a/go.sum +++ b/go.sum @@ -581,8 +581,8 @@ github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 h1:nHHjmvjitIiyP github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0/go.mod h1:YBCo4DoEeDndqvAn6eeu0vWM7QdXmHEeI9cFWplmBys= github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a h1:cdX+oxWw1lJDS3EchP+7Oz1XbErk4r7ffVJu1b1MKgI= github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a/go.mod h1:qGj2agzgwQ27nYhP3xhLs+IBzE5+ALNUg8bDfMcwPqo= -github.com/machadovilaca/operator-observability v0.0.14 h1:tS/GKvQRKvpD7pRauS1ulw0AN2V0j2mobg+mFWBt5LE= -github.com/machadovilaca/operator-observability v0.0.14/go.mod h1:e4Z3VhOXb9InkmSh00JjqBBijE+iD+YMzynBpKB3+gE= +github.com/machadovilaca/operator-observability v0.0.17 h1:XHL+YHkCv5WN5MKMR+/ld5pOnRnLVzxVX2b5OvSfhzs= +github.com/machadovilaca/operator-observability v0.0.17/go.mod h1:e4Z3VhOXb9InkmSh00JjqBBijE+iD+YMzynBpKB3+gE= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= diff --git a/pkg/monitoring/rules/recordingrules/BUILD.bazel b/pkg/monitoring/rules/recordingrules/BUILD.bazel index c043ba966ebe..67aaad1cecbb 100644 --- a/pkg/monitoring/rules/recordingrules/BUILD.bazel +++ b/pkg/monitoring/rules/recordingrules/BUILD.bazel @@ -5,6 +5,7 @@ go_library( srcs = [ "api.go", "nodes.go", + "operator.go", "recordingrules.go", "virt.go", "vm.go", diff --git a/pkg/monitoring/rules/recordingrules/operator.go b/pkg/monitoring/rules/recordingrules/operator.go new file mode 100644 index 000000000000..9bec294103ac --- /dev/null +++ b/pkg/monitoring/rules/recordingrules/operator.go @@ -0,0 +1,60 @@ +/* +Copyright 2024 The KubeVirt Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package recordingrules + +import ( + "github.com/machadovilaca/operator-observability/pkg/operatormetrics" + "github.com/machadovilaca/operator-observability/pkg/operatorrules" + "k8s.io/apimachinery/pkg/util/intstr" +) + +const ( + abnormalLabelKey = "reason" +) + +var operatorRecordingRules = []operatorrules.RecordingRule{ + { + MetricsOpts: operatormetrics.MetricOpts{ + Name: "kubevirt_over_subscribed_container_working_set_memory_bytes", + Help: "The pod with the highest exceeded memory for each container based on the working set.", + ConstLabels: map[string]string{ + abnormalLabelKey: "WorkingSetMemoryExceeded", + }, + }, + MetricType: operatormetrics.GaugeType, + Expr: intstr.FromString("max by(container, namespace)(container_memory_working_set_bytes{container=~\"virt-.*\"} - on(node, namespace, pod, container) (kube_pod_container_resource_requests{resource=\"memory\"}))"), + }, + { + MetricsOpts: operatormetrics.MetricOpts{ + Name: "kubevirt_over_subscribed_container_rss_memory_bytes", + Help: "The pod with the highest exceeded memory for each container based on the rss.", + ConstLabels: map[string]string{ + abnormalLabelKey: "RSSMemoryExceeded", + }, + }, + MetricType: operatormetrics.GaugeType, + Expr: intstr.FromString("max by(container, namespace)(container_memory_rss{container=~\"virt-.*\"} - on(node, namespace, pod, container) (kube_pod_container_resource_requests{resource=\"memory\"}))"), + }, + { + MetricsOpts: operatormetrics.MetricOpts{ + Name: "kubevirt_abnormal_state", + Help: "Monitors resources for potential problems.", + }, + MetricType: operatormetrics.GaugeType, + Expr: intstr.FromString("kubevirt_over_subscribed_container_working_set_memory_bytes or kubevirt_over_subscribed_container_rss_memory_bytes"), + }, +} diff --git a/pkg/monitoring/rules/recordingrules/recordingrules.go b/pkg/monitoring/rules/recordingrules/recordingrules.go index c3f65e2be1e2..a1b8f4ff3001 100644 --- a/pkg/monitoring/rules/recordingrules/recordingrules.go +++ b/pkg/monitoring/rules/recordingrules/recordingrules.go @@ -6,6 +6,7 @@ func Register(namespace string) error { return operatorrules.RegisterRecordingRules( apiRecordingRules, nodesRecordingRules, + operatorRecordingRules, virtRecordingRules(namespace), vmRecordingRules, vmiRecordingRules, diff --git a/vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules/prometheusrules.go b/vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules/prometheusrules.go index 97b55d07793f..9f54c7b96b58 100644 --- a/vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules/prometheusrules.go +++ b/vendor/github.com/machadovilaca/operator-observability/pkg/operatorrules/prometheusrules.go @@ -62,6 +62,7 @@ func buildRecordingRulesRules() []promv1.Rule { rules = append(rules, promv1.Rule{ Record: recordingRule.MetricsOpts.Name, Expr: recordingRule.Expr, + Labels: recordingRule.MetricsOpts.ConstLabels, }) } diff --git a/vendor/modules.txt b/vendor/modules.txt index 8fe830361e97..9d72839db94e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -241,7 +241,7 @@ github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1 # github.com/kubevirt/monitoring/pkg/metrics/parser v0.0.0-20230627123556-81a891d4462a ## explicit; go 1.20 github.com/kubevirt/monitoring/pkg/metrics/parser -# github.com/machadovilaca/operator-observability v0.0.14 +# github.com/machadovilaca/operator-observability v0.0.17 ## explicit; go 1.21 github.com/machadovilaca/operator-observability/pkg/operatormetrics github.com/machadovilaca/operator-observability/pkg/operatorrules