Skip to content

Commit

Permalink
Merge pull request #11445 from avlitman/r1.1-deprecate_alerts
Browse files Browse the repository at this point in the history
[release-1.1] Deprecate cpu and memory exceeds alerts
  • Loading branch information
kubevirt-bot committed Apr 11, 2024
2 parents a95d878 + 5b07175 commit 6b6d0a1
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 112 deletions.
50 changes: 0 additions & 50 deletions hack/prom-rule-ci/prom-rules-tests.yaml
Expand Up @@ -6,56 +6,6 @@ group_eval_order:
- kubevirt.rules
#information about this format can be found in: https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/
tests:
# Pod is using more CPU than expected
- interval: 1m
input_series:
- series: 'container_cpu_usage_seconds_total{namespace="ci",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: '1+1x6'
- series: 'kube_pod_container_resource_requests{namespace="ci",container="virt-controller",resource="cpu",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: '0+0x6'

alert_rule_test:
- eval_time: 6m
alertname: KubeVirtComponentExceedsRequestedCPU
exp_alerts:
- exp_annotations:
description: "Pod virt-controller-8546c99968-x9jgg cpu usage exceeds the CPU requested"
summary: "The containers in the pod are using more CPU than what is defined in the containers resource requests"
runbook_url: "https://kubevirt.io/monitoring/runbooks/KubeVirtComponentExceedsRequestedCPU"
exp_labels:
severity: "warning"
operator_health_impact: "none"
kubernetes_operator_part_of: "kubevirt"
kubernetes_operator_component: "kubevirt"
pod: "virt-controller-8546c99968-x9jgg"

# Pod is using more memory than expected
- interval: 1m
input_series:
- series: 'container_memory_working_set_bytes{namespace="ci",container="",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: "157286400+0x5"
- series: 'kube_pod_container_resource_requests{namespace="ci",container="virt-controller",resource="memory",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: "118325248+0x5"

alert_rule_test:
- eval_time: 5m
alertname: KubeVirtComponentExceedsRequestedMemory
exp_alerts:
- exp_annotations:
description: "Container virt-controller in pod virt-controller-8546c99968-x9jgg memory usage exceeds the memory requested"
summary: "The container is using more memory than what is defined in the containers resource requests"
runbook_url: "https://kubevirt.io/monitoring/runbooks/KubeVirtComponentExceedsRequestedMemory"
exp_labels:
severity: "warning"
operator_health_impact: "none"
kubernetes_operator_part_of: "kubevirt"
kubernetes_operator_component: "kubevirt"
namespace: ci
node: "node1"
pod: "virt-controller-8546c99968-x9jgg"
resource: "memory"
container: virt-controller

# Alerts to test whether our operators are up or not
- interval: 1m
input_series:
Expand Down
33 changes: 0 additions & 33 deletions pkg/virt-operator/resource/generate/components/prometheus.go
Expand Up @@ -422,39 +422,6 @@ func NewPrometheusRuleSpec(ns string) *v1.PrometheusRuleSpec {
operatorHealthImpactLabelKey: "none",
},
},
{
Alert: "KubeVirtComponentExceedsRequestedMemory",
Expr: intstr.FromString(
// In 'container_memory_working_set_bytes', 'container=""' filters the accumulated metric for the pod slice to measure total Memory usage for all containers within the pod
fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="memory"}) - on(pod) group_left(node) container_memory_working_set_bytes{container="",namespace="%s"}) < 0`, ns, ns)),
For: "5m",
Annotations: map[string]string{
"description": "Container {{ $labels.container }} in pod {{ $labels.pod }} memory usage exceeds the memory requested",
"summary": "The container is using more memory than what is defined in the containers resource requests",
"runbook_url": fmt.Sprintf(runbookURLTemplate, "KubeVirtComponentExceedsRequestedMemory"),
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "none",
},
},
{
Alert: "KubeVirtComponentExceedsRequestedCPU",
Expr: intstr.FromString(
// In 'container_cpu_usage_seconds_total', 'container=""' filters the accumulated metric for the pod slice to measure total CPU usage for all containers within the pod
fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="cpu"}) - on(pod) sum(rate(container_cpu_usage_seconds_total{container="",namespace="%s"}[5m])) by (pod)) < 0`, ns, ns),
),
For: "5m",
Annotations: map[string]string{
"description": "Pod {{ $labels.pod }} cpu usage exceeds the CPU requested",
"summary": "The containers in the pod are using more CPU than what is defined in the containers resource requests",
"runbook_url": fmt.Sprintf(runbookURLTemplate, "KubeVirtComponentExceedsRequestedCPU"),
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "none",
},
},
{
Alert: "KubeVirtVMIExcessiveMigrations",
Expr: intstr.FromString("sum by (vmi) (max_over_time(kubevirt_vmi_migration_succeeded[1d])) >= 12"),
Expand Down
29 changes: 0 additions & 29 deletions tests/monitoring/component_monitoring.go
Expand Up @@ -249,35 +249,6 @@ var _ = Describe("[Serial][sig-monitoring]Component Monitoring", Serial, decorat
}, 5*time.Minute, 500*time.Millisecond).Should(Succeed())
})
})

Context("Resource metrics", func() {
var resourceAlerts = []string{
"KubeVirtComponentExceedsRequestedCPU",
"KubeVirtComponentExceedsRequestedMemory",
}

BeforeEach(func() {
virtClient = kubevirt.Client()
scales = NewScaling(virtClient, []string{virtOperator.deploymentName})
scales.UpdateScale(virtOperator.deploymentName, int32(0))
reduceAlertPendingTime(virtClient)
})

AfterEach(func() {
scales.RestoreAllScales()
time.Sleep(10 * time.Second)
waitUntilAlertDoesNotExist(virtClient, resourceAlerts...)
})

It("KubeVirtComponentExceedsRequestedCPU should be triggered when virt-api exceeds requested CPU", func() {
By("updating virt-api deployment CPU and Memory requests")
updateDeploymentResourcesRequest(virtClient, virtApi.deploymentName, resource.MustParse("0m"), resource.MustParse("0Mi"))

By("waiting for KubeVirtComponentExceedsRequestedCPU and KubeVirtComponentExceedsRequestedMemory alerts")
verifyAlertExist(virtClient, "KubeVirtComponentExceedsRequestedCPU")
verifyAlertExist(virtClient, "KubeVirtComponentExceedsRequestedMemory")
})
})
})

func updateDeploymentResourcesRequest(virtClient kubecli.KubevirtClient, deploymentName string, cpu, memory resource.Quantity) {
Expand Down

0 comments on commit 6b6d0a1

Please sign in to comment.