From a836c1494dfdc41ee2dbc2b195e690462dde71ba Mon Sep 17 00:00:00 2001 From: Rafouf69 Date: Thu, 22 Feb 2024 15:12:56 +0100 Subject: [PATCH] Add a new metric named ntp_server_reachable Transform the high drift from a constant to a parameter Complete the README add metric Add metric add metric to the collector. Send it everytime the server was reache successfully at least once correct matric name correct metric name make the highDrift constant a parameter add the possibility to configure the highDrift threshold in http mode Add doc delete log statement + correction after linting --- README.md | 12 +++++++++++- collector.go | 23 ++++++++++++++++++----- main.go | 12 +++++++++++- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 707e7ed..9da67cc 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ These are the metrics supported. - `ntp_precision_seconds` - `ntp_leap` - `ntp_scrape_duration_seconds` +- `ntp_server_reachable` As an alternative to [the node-exporter's `time` module](https://github.com/prometheus/node_exporter/blob/master/docs/TIME.md), this exporter does not require an NTP component on localhost that it can talk to. We only look at the system clock and talk to the configured NTP server(s). @@ -60,12 +61,20 @@ and connection options is defined by command-line options: ``` -ntp.measurement-duration duration Duration of measurements in case of high (>10ms) drift. (default 30s) +-ntp.high-drift duration + High drift threshold. (default 10ms) -ntp.protocol-version int NTP protocol version to use. (default 4) -ntp.server string NTP server to use (required). ``` +Command-line usage example: + +```sh +ntp_exporter -ntp.server ntp.example.com -web.telemetry-path "/probe" -ntp.measurement-duration "5s" -ntp.high-drift "50ms" +``` + ### Mode 2: Variable NTP server When the option `-ntp.source http` is specified, the NTP server and connection @@ -75,11 +84,12 @@ request: - `target`: NTP server to use - `protocol`: NTP protocol version (2, 3 or 4) - `duration`: duration of measurements in case of high drift +- `high-drift`: High drift threshold to trigger multiple probing For example: ```sh -$ curl 'http://localhost:9559/metrics?target=ntp.example.com&protocol=4&duration=10s' +$ curl 'http://localhost:9559/metrics?target=ntp.example.com&protocol=4&duration=10s&high-drift=100ms' ``` ## Frequently asked questions (FAQ) diff --git a/collector.go b/collector.go index 0c1c7bc..0627a52 100644 --- a/collector.go +++ b/collector.go @@ -30,11 +30,12 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func CollectorInitial(target string, protocol int, duration time.Duration) Collector { +func CollectorInitial(target string, protocol int, duration, highDrift time.Duration) Collector { return Collector{ NtpServer: target, NtpProtocolVersion: protocol, NtpMeasurementDuration: duration, + NtpHighDrift: highDrift, buildInfo: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ Namespace: "ntp", Name: "build_info", @@ -91,6 +92,11 @@ func CollectorInitial(target string, protocol int, duration time.Duration) Colle Name: "scrape_duration_seconds", Help: "ntp_exporter: Duration of a scrape job.", }), + serverReachable: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "ntp", + Name: "server_reachable", + Help: "True if the NTP server is reachable by the NTP exporter.", + }, []string{"server"}), } } @@ -99,6 +105,7 @@ type Collector struct { NtpServer string NtpProtocolVersion int NtpMeasurementDuration time.Duration + NtpHighDrift time.Duration buildInfo prometheus.GaugeFunc stratum *prometheus.GaugeVec drift *prometheus.GaugeVec @@ -110,6 +117,7 @@ type Collector struct { precision *prometheus.GaugeVec leap *prometheus.GaugeVec scrapeDuration prometheus.Summary + serverReachable *prometheus.GaugeVec } // A single measurement returned by ntp server @@ -138,11 +146,15 @@ func (c Collector) Describe(ch chan<- *prometheus.Desc) { c.precision.Describe(ch) c.leap.Describe(ch) c.scrapeDuration.Describe(ch) + c.serverReachable.Describe(ch) } // Collect implements the prometheus.Collector interface. func (c Collector) Collect(ch chan<- prometheus.Metric) { err := c.measure() + + c.serverReachable.Collect(ch) + //only report data when measurement was successful if err == nil { c.buildInfo.Collect(ch) @@ -163,17 +175,16 @@ func (c Collector) Collect(ch chan<- prometheus.Metric) { } func (c Collector) measure() error { - const highDrift = 0.01 - begin := time.Now() measurement, err := c.getClockOffsetAndStratum() if err != nil { + c.serverReachable.WithLabelValues(c.NtpServer).Set(0) return fmt.Errorf("couldn't get NTP measurement: %w", err) } //if clock drift is unusually high (e.g. >10ms): repeat measurements for 30 seconds and submit median value - if measurement.clockOffset > highDrift { + if measurement.clockOffset > c.NtpHighDrift.Seconds() { //arrays of measurements used to calculate median var measurementsClockOffset []float64 var measurementsStratum []float64 @@ -185,10 +196,11 @@ func (c Collector) measure() error { var measurementsPrecision []float64 var measurementsLeap []float64 - log.Printf("WARN: clock drift is above %.2fs, taking multiple measurements for %.2f seconds", highDrift, c.NtpMeasurementDuration.Seconds()) + log.Printf("WARN: clock drift is above %.3fs, taking multiple measurements for %.2f seconds", c.NtpHighDrift.Seconds(), c.NtpMeasurementDuration.Seconds()) for time.Since(begin) < c.NtpMeasurementDuration { nextMeasurement, err := c.getClockOffsetAndStratum() if err != nil { + c.serverReachable.WithLabelValues(c.NtpServer).Set(1) return fmt.Errorf("couldn't get NTP measurement: %w", err) } @@ -223,6 +235,7 @@ func (c Collector) measure() error { c.rootDistance.WithLabelValues(c.NtpServer).Set(measurement.rootDistance) c.precision.WithLabelValues(c.NtpServer).Set(measurement.precision) c.leap.WithLabelValues(c.NtpServer).Set(measurement.leap) + c.serverReachable.WithLabelValues(c.NtpServer).Set(1) c.scrapeDuration.Observe(time.Since(begin).Seconds()) return nil diff --git a/main.go b/main.go index 9d41a05..d61c7d6 100644 --- a/main.go +++ b/main.go @@ -45,6 +45,7 @@ var ( ntpServer string ntpProtocolVersion int ntpMeasurementDuration time.Duration + ntpHighDrift time.Duration ntpSource string ) @@ -86,6 +87,7 @@ func init() { flag.StringVar(&ntpServer, "ntp.server", "", "NTP server to use (required).") flag.IntVar(&ntpProtocolVersion, "ntp.protocol-version", 4, "NTP protocol version to use.") flag.DurationVar(&ntpMeasurementDuration, "ntp.measurement-duration", 30*time.Second, "Duration of measurements in case of high (>10ms) drift.") + flag.DurationVar(&ntpHighDrift, "ntp.high-drift", 10*time.Millisecond, "High drift threshold.") flag.StringVar(&ntpSource, "ntp.source", "cli", "source of information about ntp server (cli / http).") flag.Parse() } @@ -96,6 +98,7 @@ func handlerMetrics(w http.ResponseWriter, r *http.Request) { s := ntpServer p := ntpProtocolVersion d := ntpMeasurementDuration + hd := ntpHighDrift if ntpSource == "http" { for _, i := range []string{"target", "protocol", "duration"} { @@ -123,10 +126,17 @@ func handlerMetrics(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return } + + if u, err := time.ParseDuration(r.URL.Query().Get("high-drift")); err == nil { + hd = u + } else { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } } registry := prometheus.NewRegistry() - registry.MustRegister(CollectorInitial(s, p, d)) + registry.MustRegister(CollectorInitial(s, p, d, hd)) h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorLog: logger}) h.ServeHTTP(w, r) }