diff --git a/README.md b/README.md index 707e7ed..9da67cc 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ These are the metrics supported. - `ntp_precision_seconds` - `ntp_leap` - `ntp_scrape_duration_seconds` +- `ntp_server_reachable` As an alternative to [the node-exporter's `time` module](https://github.com/prometheus/node_exporter/blob/master/docs/TIME.md), this exporter does not require an NTP component on localhost that it can talk to. We only look at the system clock and talk to the configured NTP server(s). @@ -60,12 +61,20 @@ and connection options is defined by command-line options: ``` -ntp.measurement-duration duration Duration of measurements in case of high (>10ms) drift. (default 30s) +-ntp.high-drift duration + High drift threshold. (default 10ms) -ntp.protocol-version int NTP protocol version to use. (default 4) -ntp.server string NTP server to use (required). ``` +Command-line usage example: + +```sh +ntp_exporter -ntp.server ntp.example.com -web.telemetry-path "/probe" -ntp.measurement-duration "5s" -ntp.high-drift "50ms" +``` + ### Mode 2: Variable NTP server When the option `-ntp.source http` is specified, the NTP server and connection @@ -75,11 +84,12 @@ request: - `target`: NTP server to use - `protocol`: NTP protocol version (2, 3 or 4) - `duration`: duration of measurements in case of high drift +- `high-drift`: High drift threshold to trigger multiple probing For example: ```sh -$ curl 'http://localhost:9559/metrics?target=ntp.example.com&protocol=4&duration=10s' +$ curl 'http://localhost:9559/metrics?target=ntp.example.com&protocol=4&duration=10s&high-drift=100ms' ``` ## Frequently asked questions (FAQ) diff --git a/collector.go b/collector.go index 31fcf31..80e7b25 100644 --- a/collector.go +++ b/collector.go @@ -30,11 +30,12 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func CollectorInitial(target string, protocol int, duration time.Duration) Collector { +func CollectorInitial(target string, protocol int, duration, highDrift time.Duration) Collector { return Collector{ NtpServer: target, NtpProtocolVersion: protocol, NtpMeasurementDuration: duration, + NtpHighDrift: highDrift, buildInfo: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ Namespace: "ntp", Name: "build_info", @@ -91,6 +92,11 @@ func CollectorInitial(target string, protocol int, duration time.Duration) Colle Name: "scrape_duration_seconds", Help: "ntp_exporter: Duration of a scrape job.", }), + serverReachable: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "ntp", + Name: "server_reachable", + Help: "True if the NTP server is reachable by the NTP exporter.", + }, []string{"server"}), } } @@ -99,6 +105,7 @@ type Collector struct { NtpServer string NtpProtocolVersion int NtpMeasurementDuration time.Duration + NtpHighDrift time.Duration buildInfo prometheus.GaugeFunc stratum *prometheus.GaugeVec drift *prometheus.GaugeVec @@ -110,6 +117,7 @@ type Collector struct { precision *prometheus.GaugeVec leap *prometheus.GaugeVec scrapeDuration prometheus.Summary + serverReachable *prometheus.GaugeVec } // A single measurement returned by ntp server @@ -138,11 +146,15 @@ func (c Collector) Describe(ch chan<- *prometheus.Desc) { c.precision.Describe(ch) c.leap.Describe(ch) c.scrapeDuration.Describe(ch) + c.serverReachable.Describe(ch) } // Collect implements the prometheus.Collector interface. func (c Collector) Collect(ch chan<- prometheus.Metric) { err := c.measure() + + c.serverReachable.Collect(ch) + // only report data when measurement was successful if err == nil { c.buildInfo.Collect(ch) @@ -163,17 +175,16 @@ func (c Collector) Collect(ch chan<- prometheus.Metric) { } func (c Collector) measure() error { - const highDrift = 0.01 - begin := time.Now() measurement, err := c.getClockOffsetAndStratum() if err != nil { + c.serverReachable.WithLabelValues(c.NtpServer).Set(0) return fmt.Errorf("couldn't get NTP measurement: %w", err) } // if clock drift is unusually high (e.g. >10ms): repeat measurements for 30 seconds and submit median value - if measurement.clockOffset > highDrift { + if measurement.clockOffset > c.NtpHighDrift.Seconds() { // arrays of measurements used to calculate median var measurementsClockOffset []float64 var measurementsStratum []float64 @@ -185,10 +196,11 @@ func (c Collector) measure() error { var measurementsPrecision []float64 var measurementsLeap []float64 - log.Printf("WARN: clock drift is above %.2fs, taking multiple measurements for %.2f seconds", highDrift, c.NtpMeasurementDuration.Seconds()) + log.Printf("WARN: clock drift is above %.3fs, taking multiple measurements for %.2f seconds", c.NtpHighDrift.Seconds(), c.NtpMeasurementDuration.Seconds()) for time.Since(begin) < c.NtpMeasurementDuration { nextMeasurement, err := c.getClockOffsetAndStratum() if err != nil { + c.serverReachable.WithLabelValues(c.NtpServer).Set(0) return fmt.Errorf("couldn't get NTP measurement: %w", err) } @@ -223,6 +235,7 @@ func (c Collector) measure() error { c.rootDistance.WithLabelValues(c.NtpServer).Set(measurement.rootDistance) c.precision.WithLabelValues(c.NtpServer).Set(measurement.precision) c.leap.WithLabelValues(c.NtpServer).Set(measurement.leap) + c.serverReachable.WithLabelValues(c.NtpServer).Set(1) c.scrapeDuration.Observe(time.Since(begin).Seconds()) return nil diff --git a/main.go b/main.go index 9d41a05..d61c7d6 100644 --- a/main.go +++ b/main.go @@ -45,6 +45,7 @@ var ( ntpServer string ntpProtocolVersion int ntpMeasurementDuration time.Duration + ntpHighDrift time.Duration ntpSource string ) @@ -86,6 +87,7 @@ func init() { flag.StringVar(&ntpServer, "ntp.server", "", "NTP server to use (required).") flag.IntVar(&ntpProtocolVersion, "ntp.protocol-version", 4, "NTP protocol version to use.") flag.DurationVar(&ntpMeasurementDuration, "ntp.measurement-duration", 30*time.Second, "Duration of measurements in case of high (>10ms) drift.") + flag.DurationVar(&ntpHighDrift, "ntp.high-drift", 10*time.Millisecond, "High drift threshold.") flag.StringVar(&ntpSource, "ntp.source", "cli", "source of information about ntp server (cli / http).") flag.Parse() } @@ -96,6 +98,7 @@ func handlerMetrics(w http.ResponseWriter, r *http.Request) { s := ntpServer p := ntpProtocolVersion d := ntpMeasurementDuration + hd := ntpHighDrift if ntpSource == "http" { for _, i := range []string{"target", "protocol", "duration"} { @@ -123,10 +126,17 @@ func handlerMetrics(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return } + + if u, err := time.ParseDuration(r.URL.Query().Get("high-drift")); err == nil { + hd = u + } else { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } } registry := prometheus.NewRegistry() - registry.MustRegister(CollectorInitial(s, p, d)) + registry.MustRegister(CollectorInitial(s, p, d, hd)) h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorLog: logger}) h.ServeHTTP(w, r) }