Skip to content

Commit

Permalink
Improved Latency graph (#15940)
Browse files Browse the repository at this point in the history
* Improved Latency graph
Store loss+jitter info in rrd instead of database
New graph icmp_perf (legacy ping_perf still valid referencing part of the newer data)
Delete device_perf table

* Change loss to an area so it is more visible

* Style fixes

* Cleanups from phpstan & tests

* exit_code fix

* Remove alert usage of device_perf

* Don't use magic __get

* Add test for bulkPing
Add host to previous tests

* style fixes

* Fix issue fping error responses
  • Loading branch information
murrant committed Apr 18, 2024
1 parent 4cce4f0 commit 49f8269
Show file tree
Hide file tree
Showing 38 changed files with 466 additions and 495 deletions.
18 changes: 11 additions & 7 deletions LibreNMS/Alert/RunAlerts.php
Expand Up @@ -31,13 +31,15 @@
namespace LibreNMS\Alert;

use App\Facades\DeviceCache;
use App\Facades\Rrd;
use App\Models\AlertTransport;
use App\Models\Eventlog;
use LibreNMS\Config;
use LibreNMS\Enum\AlertState;
use LibreNMS\Enum\Severity;
use LibreNMS\Exceptions\AlertTransportDeliveryException;
use LibreNMS\Polling\ConnectivityHelper;
use LibreNMS\Util\Number;
use LibreNMS\Util\Time;

class RunAlerts
Expand Down Expand Up @@ -116,13 +118,15 @@ public function describeAlert($alert)
$obj['status'] = $device->status;
$obj['status_reason'] = $device->status_reason;
if ((new ConnectivityHelper($device))->canPing()) {
$ping_stats = $device->perf()->latest('timestamp')->first();
$obj['ping_timestamp'] = $ping_stats->timestamp;
$obj['ping_loss'] = $ping_stats->loss;
$obj['ping_min'] = $ping_stats->min;
$obj['ping_max'] = $ping_stats->max;
$obj['ping_avg'] = $ping_stats->avg;
$obj['debug'] = $ping_stats->debug;
$last_ping = Rrd::lastUpdate(Rrd::name($device->hostname, 'icmp-perf'));
if ($last_ping) {
$obj['ping_timestamp'] = $last_ping->timestamp;
$obj['ping_loss'] = Number::calculatePercent($last_ping->get('xmt') - $last_ping->get('rcv'), $last_ping->get('xmt'));
$obj['ping_min'] = $last_ping->get('min');
$obj['ping_max'] = $last_ping->get('max');
$obj['ping_avg'] = $last_ping->get('avg');
$obj['debug'] = 'unsupported';
}
}
$extra = $alert['details'];

Expand Down
89 changes: 71 additions & 18 deletions LibreNMS/Data/Source/Fping.php
Expand Up @@ -26,58 +26,111 @@
namespace LibreNMS\Data\Source;

use LibreNMS\Config;
use LibreNMS\Exceptions\FpingUnparsableLine;
use Log;
use Symfony\Component\Process\Process;

class Fping
{
private string $fping_bin;
private string|false $fping6_bin;
private int $count;
private int $timeout;
private int $interval;
private int $tos;
private int $retries;

public function __construct()
{
// prep fping parameters
$this->fping_bin = Config::get('fping', 'fping');
$fping6 = Config::get('fping6', 'fping6');
$this->fping6_bin = is_executable($fping6) ? $fping6 : false;
$this->count = max(Config::get('fping_options.count', 3), 1);
$this->interval = max(Config::get('fping_options.interval', 500), 20);
$this->timeout = max(Config::get('fping_options.timeout', 500), $this->interval);
$this->retries = Config::get('fping_options.retries', 2);
$this->tos = Config::get('fping_options.tos', 0);
}

/**
* Run fping against a hostname/ip in count mode and collect stats.
*
* @param string $host
* @param int $count (min 1)
* @param int $interval (min 20)
* @param int $timeout (not more than $interval)
* @param string $host hostname or ip
* @param string $address_family ipv4 or ipv6
* @return \LibreNMS\Data\Source\FpingResponse
*/
public function ping($host, $count = 3, $interval = 1000, $timeout = 500, $address_family = 'ipv4'): FpingResponse
public function ping($host, $address_family = 'ipv4'): FpingResponse
{
$interval = max($interval, 20);

$fping = Config::get('fping');
$fping6 = Config::get('fping6');
$fping_tos = Config::get('fping_options.tos', 0);

if ($address_family == 'ipv6') {
$cmd = is_executable($fping6) ? [$fping6] : [$fping, '-6'];
$cmd = $this->fping6_bin === false ? [$this->fping_bin, '-6'] : [$this->fping6_bin];
} else {
$cmd = is_executable($fping6) ? [$fping] : [$fping, '-4'];
$cmd = $this->fping6_bin === false ? [$this->fping_bin, '-4'] : [$this->fping_bin];
}

// build the command
$cmd = array_merge($cmd, [
'-e',
'-q',
'-c',
max($count, 1),
$this->count,
'-p',
$interval,
$this->interval,
'-t',
max($timeout, $interval),
$this->timeout,
'-O',
$fping_tos,
$this->tos,
$host,
]);

$process = app()->make(Process::class, ['command' => $cmd]);
Log::debug('[FPING] ' . $process->getCommandLine() . PHP_EOL);
$process->run();

$response = FpingResponse::parseOutput($process->getErrorOutput(), $process->getExitCode());
$response = FpingResponse::parseLine($process->getErrorOutput(), $process->getExitCode());

Log::debug("response: $response");

return $response;
}

public function bulkPing(array $hosts, callable $callback): void
{
$process = app()->make(Process::class, ['command' => [
$this->fping_bin,
'-f', '-',
'-e',
'-t', $this->timeout,
'-r', $this->retries,
'-O', $this->tos,
'-c', $this->count,
]]);

// twice polling interval
$process->setTimeout(Config::get('rrd.step', 300) * 2);
// send hostnames to stdin to avoid overflowing cli length limits
$process->setInput(implode(PHP_EOL, $hosts) . PHP_EOL);

Log::debug('[FPING] ' . $process->getCommandLine() . PHP_EOL);

$partial = '';
$process->run(function ($type, $output) use ($callback, &$partial) {
// stdout contains individual ping responses, stderr contains summaries
if ($type == Process::ERR) {
foreach (explode(PHP_EOL, $output) as $line) {
if ($line) {
Log::debug("Fping OUTPUT|$line PARTIAL|$partial");
try {
$response = FpingResponse::parseLine($partial . $line);
call_user_func($callback, $response);
$partial = '';
} catch (FpingUnparsableLine $e) {
// handle possible partial line
$partial = $e->unparsedLine;
}
}
}
}
});
}
}
148 changes: 77 additions & 71 deletions LibreNMS/Data/Source/FpingResponse.php
Expand Up @@ -25,46 +25,19 @@

namespace LibreNMS\Data\Source;

use App\Models\DevicePerf;
use App\Facades\Rrd;
use App\Models\Device;
use Carbon\Carbon;
use LibreNMS\Exceptions\FpingUnparsableLine;
use LibreNMS\RRD\RrdDefinition;

class FpingResponse
{
/**
* @var int
*/
public $transmitted;
/**
* @var int
*/
public $received;
/**
* @var int
*/
public $loss;
/**
* @var float
*/
public $min_latency;
/**
* @var float
*/
public $max_latency;
/**
* @var float
*/
public $avg_latency;
/**
* @var int
*/
public $duplicates;
/**
* @var int
*/
public $exit_code;
/**
* @var bool
*/
private $skipped;
const SUCESS = 0;
const UNREACHABLE = 1;
const INVALID_HOST = 2;
const INVALID_ARGS = 3;
const SYS_CALL_FAIL = 4;

/**
* @param int $transmitted ICMP packets transmitted
Expand All @@ -75,50 +48,72 @@ class FpingResponse
* @param float $avg_latency Average latency (ms)
* @param int $duplicates Number of duplicate responses (Indicates network issue)
* @param int $exit_code Return code from fping
* @param string|null $host Hostname/IP pinged
*/
public function __construct(int $transmitted, int $received, int $loss, float $min_latency, float $max_latency, float $avg_latency, int $duplicates, int $exit_code, bool $skipped = false)
private function __construct(
public readonly int $transmitted,
public readonly int $received,
public readonly int $loss,
public readonly float $min_latency,
public readonly float $max_latency,
public readonly float $avg_latency,
public readonly int $duplicates,
public int $exit_code,
public readonly ?string $host = null,
private bool $skipped = false)
{
}

public static function artificialUp(string $host = null): static
{
return new static(1, 1, 0, 0, 0, 0, 0, 0, $host, true);
}

public static function artificialDown(string $host = null): static
{
$this->transmitted = $transmitted;
$this->received = $received;
$this->loss = $loss;
$this->min_latency = $min_latency;
$this->max_latency = $max_latency;
$this->avg_latency = $avg_latency;
$this->duplicates = $duplicates;
$this->exit_code = $exit_code;
$this->skipped = $skipped;
return new static(1, 0, 100, 0, 0, 0, 0, 0, $host, false);
}

public static function artificialUp(): FpingResponse
/**
* Change the exit code to 0, this may be approriate when a non-fatal error was encourtered
*/
public function ignoreFailure(): void
{
return new FpingResponse(1, 1, 0, 0, 0, 0, 0, 0, true);
$this->exit_code = 0;
}

public function wasSkipped(): bool
{
return $this->skipped;
}

public static function parseOutput(string $output, int $code): FpingResponse
public static function parseLine(string $output, int $code = null): FpingResponse
{
preg_match('#= (\d+)/(\d+)/(\d+)%(, min/avg/max = ([\d.]+)/([\d.]+)/([\d.]+))?$#', $output, $parsed);
[, $xmt, $rcv, $loss, , $min, $avg, $max] = array_pad($parsed, 8, 0);
$matched = preg_match('#(\S+)\s*: (xmt/rcv/%loss = (\d+)/(\d+)/(?:(100)%|(\d+)%, min/avg/max = ([\d.]+)/([\d.]+)/([\d.]+))|Name or service not known|Temporary failure in name resolution)$#', $output, $parsed);

if ($code == 0 && ! $matched) {
throw new FpingUnparsableLine($output);
}

[, $host, $error, $xmt, $rcv, $loss100, $loss, $min, $avg, $max] = array_pad($parsed, 10, 0);
$loss = $loss100 ?: $loss;

if ($loss < 0) {
$xmt = 1;
$rcv = 0;
$loss = 100;
if ($error == 'Name or service not known') {
return new FpingResponse(0, 0, 0, 0, 0, 0, 0, self::INVALID_HOST, $host);
} elseif ($error == 'Temporary failure in name resolution') {
return new FpingResponse(0, 0, 0, 0, 0, 0, 0, self::SYS_CALL_FAIL, $host);
}

return new FpingResponse(
return new static(
(int) $xmt,
(int) $rcv,
(int) $loss,
(float) $min,
(float) $max,
(float) $avg,
substr_count($output, 'duplicate'),
$code
$code ?? ($loss100 ? self::UNREACHABLE : self::SUCESS),
$host,
);
}

Expand All @@ -131,26 +126,37 @@ public function success(): bool
return $this->exit_code == 0 && $this->loss < 100;
}

public function toModel(): ?DevicePerf
{
return new DevicePerf([
'xmt' => $this->transmitted,
'rcv' => $this->received,
'loss' => $this->loss,
'min' => $this->min_latency,
'max' => $this->max_latency,
'avg' => $this->avg_latency,
]);
}

public function __toString()
{
$str = "xmt/rcv/%loss = $this->transmitted/$this->received/$this->loss%";
$str = "$this->host : xmt/rcv/%loss = $this->transmitted/$this->received/$this->loss%";

if ($this->max_latency) {
$str .= ", min/avg/max = $this->min_latency/$this->avg_latency/$this->max_latency";
}

return $str;
}

public function saveStats(Device $device): void
{
$device->last_ping = Carbon::now();
$device->last_ping_timetaken = $this->avg_latency ?: $device->last_ping_timetaken;
$device->save();

// detailed multi-ping capable graph
app('Datastore')->put($device->toArray(), 'icmp-perf', [
'rrd_def' => RrdDefinition::make()
->addDataset('avg', 'GAUGE', 0, 65535, source_ds: 'ping', source_file: Rrd::name($device->hostname, 'ping-perf'))
->addDataset('xmt', 'GAUGE', 0, 65535)
->addDataset('rcv', 'GAUGE', 0, 65535)
->addDataset('min', 'GAUGE', 0, 65535)
->addDataset('max', 'GAUGE', 0, 65535),
], [
'avg' => $this->avg_latency,
'xmt' => $this->transmitted,
'rcv' => $this->received,
'min' => $this->min_latency,
'max' => $this->max_latency,
]);
}
}

2 comments on commit 49f8269

@librenms-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on LibreNMS Community. There might be relevant details there:

https://community.librenms.org/t/latency-graphs-not-showing-for-new-ping-only-devices/24150/1

@LEV82
Copy link
Contributor

@LEV82 LEV82 commented on 49f8269 Apr 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, found bugs in this commit
#15986

Please sign in to comment.