diff options
Diffstat (limited to 'conf.d/health.d/net.conf')
-rw-r--r-- | conf.d/health.d/net.conf | 51 |
1 files changed, 36 insertions, 15 deletions
diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf index f65bc4fc..7753aa18 100644 --- a/conf.d/health.d/net.conf +++ b/conf.d/health.d/net.conf @@ -1,27 +1,48 @@ +# ----------------------------------------------------------------------------- +# make sure we collect values for each interface + +template: interface_last_collected_secs + on: net.net + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + + +# ----------------------------------------------------------------------------- # check if an interface is dropping packets -# the alarm is checked every 10 seconds -# and examines the last 30 minutes of data +# the alarm is checked every 1 minute +# and examines the last hour of data -template: 30min_packet_drops +template: 1hour_packet_drops on: net.drops - lookup: sum -30m unaligned absolute - every: 1m - crit: $this > 0 + lookup: sum -1h unaligned absolute units: packets - info: dropped packets in the last 30 minutes + every: 1m + warn: $this > 0 + delay: down 30m multiplier 1.5 max 1h + info: interface dropped packets in the last hour + to: sysadmin +# ----------------------------------------------------------------------------- + # check if an interface is having FIFO # buffer errors -# the alarm is checked every 10 seconds -# and examines the last 30 minutes of data +# the alarm is checked every 1 minute +# and examines the last hour of data -template: 30min_fifo_errors +template: 1hour_fifo_errors on: net.fifo - lookup: sum -30m unaligned absolute - every: 1m - crit: $this > 0 + lookup: sum -1h unaligned absolute units: errors - info: network interface fifo errors in the last 30 minutes - + every: 1m + warn: $this > 0 + delay: down 30m multiplier 1.5 max 1h + info: interface fifo errors in the last hour + to: sysadmin |