Skip to content

Commit

Permalink
Add a new metric named ntp_server_reachable, transform the high drift…
Browse files Browse the repository at this point in the history
… from a constant to a parameter

Closes #85
  • Loading branch information
Rafouf69 authored and SuperSandro2000 committed Apr 10, 2024
1 parent d6b682f commit 5b177cd
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 7 deletions.
12 changes: 11 additions & 1 deletion README.md
Expand Up @@ -18,6 +18,7 @@ These are the metrics supported.
- `ntp_precision_seconds`
- `ntp_leap`
- `ntp_scrape_duration_seconds`
- `ntp_server_reachable`

As an alternative to [the node-exporter's `time` module](https://github.com/prometheus/node_exporter/blob/master/docs/TIME.md), this exporter does not require an NTP component on localhost that it can talk to. We only look at the system clock and talk to the configured NTP server(s).

Expand Down Expand Up @@ -60,12 +61,20 @@ and connection options is defined by command-line options:
```
-ntp.measurement-duration duration
Duration of measurements in case of high (>10ms) drift. (default 30s)
-ntp.high-drift duration
High drift threshold. (default 10ms)
-ntp.protocol-version int
NTP protocol version to use. (default 4)
-ntp.server string
NTP server to use (required).
```

Command-line usage example:

```sh
ntp_exporter -ntp.server ntp.example.com -web.telemetry-path "/probe" -ntp.measurement-duration "5s" -ntp.high-drift "50ms"
```

### Mode 2: Variable NTP server

When the option `-ntp.source http` is specified, the NTP server and connection
Expand All @@ -75,11 +84,12 @@ request:
- `target`: NTP server to use
- `protocol`: NTP protocol version (2, 3 or 4)
- `duration`: duration of measurements in case of high drift
- `high-drift`: High drift threshold to trigger multiple probing

For example:

```sh
$ curl 'http://localhost:9559/metrics?target=ntp.example.com&protocol=4&duration=10s'
$ curl 'http://localhost:9559/metrics?target=ntp.example.com&protocol=4&duration=10s&high-drift=100ms'
```

## Frequently asked questions (FAQ)
Expand Down
23 changes: 18 additions & 5 deletions collector.go
Expand Up @@ -30,11 +30,12 @@ import (
"github.com/prometheus/client_golang/prometheus"
)

func CollectorInitial(target string, protocol int, duration time.Duration) Collector {
func CollectorInitial(target string, protocol int, duration, highDrift time.Duration) Collector {
return Collector{
NtpServer: target,
NtpProtocolVersion: protocol,
NtpMeasurementDuration: duration,
NtpHighDrift: highDrift,
buildInfo: prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Namespace: "ntp",
Name: "build_info",
Expand Down Expand Up @@ -91,6 +92,11 @@ func CollectorInitial(target string, protocol int, duration time.Duration) Colle
Name: "scrape_duration_seconds",
Help: "ntp_exporter: Duration of a scrape job.",
}),
serverReachable: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "ntp",
Name: "server_reachable",
Help: "True if the NTP server is reachable by the NTP exporter.",
}, []string{"server"}),
}
}

Expand All @@ -99,6 +105,7 @@ type Collector struct {
NtpServer string
NtpProtocolVersion int
NtpMeasurementDuration time.Duration
NtpHighDrift time.Duration
buildInfo prometheus.GaugeFunc
stratum *prometheus.GaugeVec
drift *prometheus.GaugeVec
Expand All @@ -110,6 +117,7 @@ type Collector struct {
precision *prometheus.GaugeVec
leap *prometheus.GaugeVec
scrapeDuration prometheus.Summary
serverReachable *prometheus.GaugeVec
}

// A single measurement returned by ntp server
Expand Down Expand Up @@ -138,11 +146,15 @@ func (c Collector) Describe(ch chan<- *prometheus.Desc) {
c.precision.Describe(ch)
c.leap.Describe(ch)
c.scrapeDuration.Describe(ch)
c.serverReachable.Describe(ch)
}

// Collect implements the prometheus.Collector interface.
func (c Collector) Collect(ch chan<- prometheus.Metric) {
err := c.measure()

c.serverReachable.Collect(ch)

// only report data when measurement was successful
if err == nil {
c.buildInfo.Collect(ch)
Expand All @@ -163,17 +175,16 @@ func (c Collector) Collect(ch chan<- prometheus.Metric) {
}

func (c Collector) measure() error {
const highDrift = 0.01

begin := time.Now()
measurement, err := c.getClockOffsetAndStratum()

if err != nil {
c.serverReachable.WithLabelValues(c.NtpServer).Set(0)
return fmt.Errorf("couldn't get NTP measurement: %w", err)
}

// if clock drift is unusually high (e.g. >10ms): repeat measurements for 30 seconds and submit median value
if measurement.clockOffset > highDrift {
if measurement.clockOffset > c.NtpHighDrift.Seconds() {
// arrays of measurements used to calculate median
var measurementsClockOffset []float64
var measurementsStratum []float64
Expand All @@ -185,10 +196,11 @@ func (c Collector) measure() error {
var measurementsPrecision []float64
var measurementsLeap []float64

log.Printf("WARN: clock drift is above %.2fs, taking multiple measurements for %.2f seconds", highDrift, c.NtpMeasurementDuration.Seconds())
log.Printf("WARN: clock drift is above %.3fs, taking multiple measurements for %.2f seconds", c.NtpHighDrift.Seconds(), c.NtpMeasurementDuration.Seconds())
for time.Since(begin) < c.NtpMeasurementDuration {
nextMeasurement, err := c.getClockOffsetAndStratum()
if err != nil {
c.serverReachable.WithLabelValues(c.NtpServer).Set(0)
return fmt.Errorf("couldn't get NTP measurement: %w", err)
}

Expand Down Expand Up @@ -223,6 +235,7 @@ func (c Collector) measure() error {
c.rootDistance.WithLabelValues(c.NtpServer).Set(measurement.rootDistance)
c.precision.WithLabelValues(c.NtpServer).Set(measurement.precision)
c.leap.WithLabelValues(c.NtpServer).Set(measurement.leap)
c.serverReachable.WithLabelValues(c.NtpServer).Set(1)

c.scrapeDuration.Observe(time.Since(begin).Seconds())
return nil
Expand Down
12 changes: 11 additions & 1 deletion main.go
Expand Up @@ -45,6 +45,7 @@ var (
ntpServer string
ntpProtocolVersion int
ntpMeasurementDuration time.Duration
ntpHighDrift time.Duration
ntpSource string
)

Expand Down Expand Up @@ -86,6 +87,7 @@ func init() {
flag.StringVar(&ntpServer, "ntp.server", "", "NTP server to use (required).")
flag.IntVar(&ntpProtocolVersion, "ntp.protocol-version", 4, "NTP protocol version to use.")
flag.DurationVar(&ntpMeasurementDuration, "ntp.measurement-duration", 30*time.Second, "Duration of measurements in case of high (>10ms) drift.")
flag.DurationVar(&ntpHighDrift, "ntp.high-drift", 10*time.Millisecond, "High drift threshold.")
flag.StringVar(&ntpSource, "ntp.source", "cli", "source of information about ntp server (cli / http).")
flag.Parse()
}
Expand All @@ -96,6 +98,7 @@ func handlerMetrics(w http.ResponseWriter, r *http.Request) {
s := ntpServer
p := ntpProtocolVersion
d := ntpMeasurementDuration
hd := ntpHighDrift

if ntpSource == "http" {
for _, i := range []string{"target", "protocol", "duration"} {
Expand Down Expand Up @@ -123,10 +126,17 @@ func handlerMetrics(w http.ResponseWriter, r *http.Request) {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}

if u, err := time.ParseDuration(r.URL.Query().Get("high-drift")); err == nil {
hd = u
} else {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
}

registry := prometheus.NewRegistry()
registry.MustRegister(CollectorInitial(s, p, d))
registry.MustRegister(CollectorInitial(s, p, d, hd))
h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorLog: logger})
h.ServeHTTP(w, r)
}
Expand Down

0 comments on commit 5b177cd

Please sign in to comment.