Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor metrics domainstats #11601

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/virt-handler/BUILD.bazel
Expand Up @@ -11,8 +11,8 @@ go_library(
"//pkg/controller:go_default_library",
"//pkg/healthz:go_default_library",
"//pkg/monitoring/domainstats/downwardmetrics:go_default_library",
"//pkg/monitoring/domainstats/prometheus:go_default_library",
"//pkg/monitoring/metrics/virt-handler:go_default_library",
"//pkg/monitoring/metrics/virt-handler/handler:go_default_library",
"//pkg/monitoring/profiler:go_default_library",
"//pkg/safepath:go_default_library",
"//pkg/service:go_default_library",
Expand Down
13 changes: 6 additions & 7 deletions cmd/virt-handler/virt-handler.go
Expand Up @@ -71,8 +71,8 @@ import (
"kubevirt.io/kubevirt/pkg/certificates/bootstrap"
containerdisk "kubevirt.io/kubevirt/pkg/container-disk"
"kubevirt.io/kubevirt/pkg/controller"
promdomain "kubevirt.io/kubevirt/pkg/monitoring/domainstats/prometheus" // import for prometheus metrics
metrics "kubevirt.io/kubevirt/pkg/monitoring/metrics/virt-handler"
metricshandler "kubevirt.io/kubevirt/pkg/monitoring/metrics/virt-handler/handler"
"kubevirt.io/kubevirt/pkg/monitoring/profiler"
"kubevirt.io/kubevirt/pkg/service"
"kubevirt.io/kubevirt/pkg/util"
Expand Down Expand Up @@ -201,10 +201,6 @@ func (app *virtHandlerApp) Run() {
logger.V(1).Infof("hostname %s", app.HostOverride)
var err error

if err := metrics.SetupMetrics(); err != nil {
panic(fmt.Errorf("failed to set up metrics: %v", err))
}

// Copy container-disk binary
targetFile := filepath.Join(app.VirtLibDir, "/init/usr/bin/container-disk")
err = os.MkdirAll(filepath.Dir(targetFile), os.ModePerm)
Expand Down Expand Up @@ -375,7 +371,10 @@ func (app *virtHandlerApp) Run() {
app.VirtShareDir,
)

promdomain.SetupDomainStatsCollector(app.virtCli, app.VirtShareDir, app.HostOverride, app.MaxRequestsInFlight, vmiSourceInformer)
if err := metrics.SetupMetrics(app.VirtShareDir, app.HostOverride, app.MaxRequestsInFlight, vmiSourceInformer); err != nil {
panic(err)
}

if err := downwardmetrics.RunDownwardMetricsCollector(context.Background(), app.HostOverride, vmiSourceInformer, podIsolationDetector); err != nil {
panic(fmt.Errorf("failed to set up the downwardMetrics collector: %v", err))
}
Expand Down Expand Up @@ -543,7 +542,7 @@ func (app *virtHandlerApp) runPrometheusServer(errCh chan error) {

mux.Add(webService)
log.Log.V(1).Infof("metrics: max concurrent requests=%d", app.MaxRequestsInFlight)
mux.Handle("/metrics", promdomain.Handler(app.MaxRequestsInFlight))
mux.Handle("/metrics", metricshandler.Handler(app.MaxRequestsInFlight))
server := http.Server{
Addr: app.ServiceListen.Address(),
Handler: mux,
Expand Down
24 changes: 8 additions & 16 deletions docs/metrics.md
@@ -1,16 +1,4 @@
<!--
This is an auto-generated file.
PLEASE DO NOT EDIT THIS FILE.
See "Developing new metrics" below how to generate this file
-->

# KubeVirt metrics
This document aims to help users that are not familiar with all metrics exposed by different KubeVirt components.
All metrics documented here are auto-generated by the utility tool `tools/doc-generator` and reflects exactly what is being exposed.

## KubeVirt Metrics List
### kubevirt_info
Version information.

### kubevirt_allocatable_nodes
The number of allocatable nodes in the cluster. Type: Gauge.
Expand All @@ -24,6 +12,9 @@ Indicates whether the Software Emulation is enabled in the configuration. Type:
### kubevirt_console_active_connections
Amount of active Console connections, broken down by namespace and vmi name. Type: Gauge.

### kubevirt_info
Version information. Type: Gauge.

### kubevirt_memory_delta_from_requested_bytes
The delta between the pod with highest memory working set or rss and its requested memory for each container, virt-controller, virt-handler, virt-api and virt-operator. Type: Gauge.

Expand Down Expand Up @@ -154,7 +145,7 @@ The total amount of memory written out to swap space of the guest in bytes. Type
The amount of memory left completely unused by the system. Memory that is available but used for reclaimable caches should NOT be reported as free. Type: Gauge.

### kubevirt_vmi_memory_usable_bytes
The amount of memory which can be reclaimed by balloon without pushing the guest system to swap, corresponds to 'Available' in /proc/meminfo Type: Gauge.
The amount of memory which can be reclaimed by balloon without pushing the guest system to swap, corresponds to 'Available' in /proc/meminfo. Type: Gauge.

### kubevirt_vmi_memory_used_bytes
Amount of `used` memory as seen by the domain. Type: Gauge.
Expand Down Expand Up @@ -202,7 +193,7 @@ The total number of rx packets dropped on vNIC interfaces. Type: Counter.
Total network traffic received packets. Type: Counter.

### kubevirt_vmi_network_traffic_bytes_total
Deprecated. Type: Counter.
[Deprecated] Total number of bytes sent and received. Type: Counter.

### kubevirt_vmi_network_transmit_bytes_total
Total network traffic transmitted in bytes. Type: Counter.
Expand Down Expand Up @@ -283,6 +274,7 @@ Returns the labels of the persistent volume claims that are used for restoring v
Amount of active VNC connections, broken down by namespace and vmi name. Type: Gauge.

## Developing new metrics
After developing new metrics or changing old ones, please run `make generate` to regenerate this document.

If you feel that the new metric doesn't follow these rules, please change `doc-generator` with your needs.
All metrics documented here are auto-generated and reflect exactly what is being
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@machadovilaca Why not keep the note about make generate ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personal opinion, I like this wording better, and tbh if a developer didn't regenerate this document, he also did not read this section at the end of the file. We also already have the CI validating the update

exposed. After developing new metrics or changing old ones please regenerate
this document.
3 changes: 1 addition & 2 deletions hack/generate.sh
Expand Up @@ -139,8 +139,7 @@ ${KUBEVIRT_DIR}/tools/openapispec/openapispec --dump-api-spec-path ${KUBEVIRT_DI
(cd ${KUBEVIRT_DIR}/tools/doc-generator/ && go_build)
(
cd ${KUBEVIRT_DIR}/docs
${KUBEVIRT_DIR}/tools/doc-generator/doc-generator
mv newmetrics.md metrics.md
${KUBEVIRT_DIR}/tools/doc-generator/doc-generator >metrics.md
)

rm -f ${KUBEVIRT_DIR}/manifests/generated/*
Expand Down
11 changes: 0 additions & 11 deletions pkg/monitoring/domainstats/collector_suite_test.go

This file was deleted.

2 changes: 1 addition & 1 deletion pkg/monitoring/domainstats/downwardmetrics/BUILD.bazel
Expand Up @@ -13,7 +13,7 @@ go_library(
"//pkg/downwardmetrics/vhostmd:go_default_library",
"//pkg/downwardmetrics/vhostmd/api:go_default_library",
"//pkg/downwardmetrics/vhostmd/metrics:go_default_library",
"//pkg/monitoring/domainstats:go_default_library",
"//pkg/monitoring/metrics/virt-handler/domainstats/collector:go_default_library",
"//pkg/virt-handler/cmd-client:go_default_library",
"//pkg/virt-handler/isolation:go_default_library",
"//pkg/virt-launcher/virtwrap/stats:go_default_library",
Expand Down
12 changes: 7 additions & 5 deletions pkg/monitoring/domainstats/downwardmetrics/scraper.go
Expand Up @@ -14,14 +14,14 @@ import (
"kubevirt.io/kubevirt/pkg/downwardmetrics/vhostmd"
"kubevirt.io/kubevirt/pkg/downwardmetrics/vhostmd/api"
metricspkg "kubevirt.io/kubevirt/pkg/downwardmetrics/vhostmd/metrics"
vms "kubevirt.io/kubevirt/pkg/monitoring/domainstats"
"kubevirt.io/kubevirt/pkg/monitoring/metrics/virt-handler/domainstats/collector"
cmdclient "kubevirt.io/kubevirt/pkg/virt-handler/cmd-client"
"kubevirt.io/kubevirt/pkg/virt-handler/isolation"
"kubevirt.io/kubevirt/pkg/virt-launcher/virtwrap/stats"
)

const DownwardmetricsRefreshDuration = 5 * time.Second
const DownwardmetricsCollectionTimeout = vms.CollectionTimeout
const DownwardmetricsCollectionTimeout = collector.CollectionTimeout
const qemuVersionUnknown = "qemu-unknown"

type StaticHostMetrics struct {
Expand All @@ -35,6 +35,8 @@ type Scraper struct {
reporter *DownwardMetricsReporter
}

func (s *Scraper) Complete() {}

func (s *Scraper) Scrape(socketFile string, vmi *k6sv1.VirtualMachineInstance) {
if !vmi.IsRunning() || !downwardmetrics.HasDownwardMetricDisk(vmi) {
return
Expand Down Expand Up @@ -100,7 +102,7 @@ func (r *DownwardMetricsReporter) Report(socketFile string) (*api.Metrics, error
// In the best case the information is stale, in the worst case the information is stale *and*
// the reporting channel is already closed, leading to a possible panic - see below
elapsed := time.Now().Sub(ts)
if elapsed > vms.StatsMaxAge {
if elapsed > collector.StatsMaxAge {
log.Log.Infof("took too long (%v) to collect stats from %s: ignored", elapsed, socketFile)
return nil, fmt.Errorf("took too long (%v) to collect stats from %s: ignored", elapsed, socketFile)
}
Expand Down Expand Up @@ -142,7 +144,7 @@ func guestMemoryMetrics(vmStats *stats.DomainStats) []api.Metric {
}

type Collector struct {
concCollector *vms.ConcurrentCollector
concCollector *collector.ConcurrentCollector
}

func NewReporter(nodeName string) *DownwardMetricsReporter {
Expand All @@ -161,7 +163,7 @@ func RunDownwardMetricsCollector(context context.Context, nodeName string, vmiIn
isolation: isolation,
reporter: NewReporter(nodeName),
}
collector := vms.NewConcurrentCollector(1)
collector := collector.NewConcurrentCollector(1)

go func() {
ticker := time.NewTicker(DownwardmetricsRefreshDuration)
Expand Down
41 changes: 0 additions & 41 deletions pkg/monitoring/domainstats/prometheus/BUILD.bazel

This file was deleted.