diff options
Diffstat (limited to 'conf.d/health.d/disks.conf')
-rw-r--r-- | conf.d/health.d/disks.conf | 167 |
1 files changed, 0 insertions, 167 deletions
diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf deleted file mode 100644 index 26f85848..00000000 --- a/conf.d/health.d/disks.conf +++ /dev/null @@ -1,167 +0,0 @@ - -# you can disable an alarm notification by setting the 'to' line to: silent - - -# ----------------------------------------------------------------------------- -# low disk space - -# checking the latest collected values -# raise an alarm if the disk is low on -# available disk space - -template: disk_space_usage - on: disk.space - os: linux freebsd - hosts: * -families: * - calc: $used * 100 / ($avail + $used) - units: % - every: 1m - warn: $this > (($status >= $WARNING ) ? (80) : (90)) - crit: $this > (($status == $CRITICAL) ? (90) : (98)) - delay: up 1m down 15m multiplier 1.5 max 1h - info: current disk space usage - to: sysadmin - -template: disk_inode_usage - on: disk.inodes - os: linux freebsd - hosts: * -families: * - calc: $used * 100 / ($avail + $used) - units: % - every: 1m - warn: $this > (($status >= $WARNING) ? (80) : (90)) - crit: $this > (($status == $CRITICAL) ? (90) : (98)) - delay: up 1m down 15m multiplier 1.5 max 1h - info: current disk inode usage - to: sysadmin - - -# ----------------------------------------------------------------------------- -# disk fill rate - -# calculate the rate the disk fills -# use as base, the available space change -# during the last hour - -# this is just a calculation - it has no alarm -# we will use it in the next template to find -# the hours remaining - -template: disk_fill_rate - on: disk.space - os: linux freebsd - hosts: * -families: * - lookup: min -10m at -50m unaligned of avail - calc: ($this - $avail) / (($now - $after) / 3600) - every: 1m - units: GB/hour - info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour - - -# calculate the hours remaining -# if the disk continues to fill -# in this rate - -template: out_of_disk_space_time - on: disk.space - os: linux freebsd - hosts: * -families: * - calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf) - units: hours - every: 10s - warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8)) - crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2)) - delay: down 15m multiplier 1.2 max 1h - info: estimated time the disk will run out of space, if the system continues to add data with the rate of the last hour - to: sysadmin - - -# ----------------------------------------------------------------------------- -# disk inode fill rate - -# calculate the rate the disk inodes are allocated -# use as base, the available inodes change -# during the last hour - -# this is just a calculation - it has no alarm -# we will use it in the next template to find -# the hours remaining - -template: disk_inode_rate - on: disk.inodes - os: linux freebsd - hosts: * -families: * - lookup: min -10m at -50m unaligned of avail - calc: ($this - $avail) / (($now - $after) / 3600) - every: 1m - units: inodes/hour - info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour - -# calculate the hours remaining -# if the disk inodes are allocated -# in this rate - -template: out_of_disk_inodes_time - on: disk.inodes - os: linux freebsd - hosts: * -families: * - calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf) - units: hours - every: 10s - warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8)) - crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2)) - delay: down 15m multiplier 1.2 max 1h - info: estimated time the disk will run out of inodes, if the system continues to allocate inodes with the rate of the last hour - to: sysadmin - - -# ----------------------------------------------------------------------------- -# disk congestion - -# raise an alarm if the disk is congested -# by calculating the average disk utilization -# for the last 10 minutes - -template: 10min_disk_utilization - on: disk.util - os: linux freebsd - hosts: * -families: * - lookup: average -10m unaligned - units: % - every: 1m - green: 90 - red: 98 - warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1)) - crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1)) - delay: down 15m multiplier 1.2 max 1h - info: the percentage of time the disk was busy, during the last 10 minutes - to: sysadmin - - -# raise an alarm if the disk backlog -# is above 1000ms (1s) per second -# for 10 minutes -# (i.e. the disk cannot catch up) - -template: 10min_disk_backlog - on: disk.backlog - os: linux - hosts: * -families: * - lookup: average -10m unaligned - units: ms - every: 1m - green: 2000 - red: 5000 - warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1)) - crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1)) - delay: down 15m multiplier 1.2 max 1h - info: average of the kernel estimated disk backlog, for the last 10 minutes - to: sysadmin |