diff options
Diffstat (limited to 'health/health.d/portcheck.conf')
-rw-r--r-- | health/health.d/portcheck.conf | 96 |
1 files changed, 54 insertions, 42 deletions
diff --git a/health/health.d/portcheck.conf b/health/health.d/portcheck.conf index 29dcebbc7..b977dbb31 100644 --- a/health/health.d/portcheck.conf +++ b/health/health.d/portcheck.conf @@ -1,46 +1,58 @@ -template: portcheck_last_collected_secs -families: * - on: portcheck.status - calc: $now - $last_collected_t - every: 10s - units: seconds ago - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection - to: sysadmin + template: portcheck_last_collected_secs + families: * + on: portcheck.status + class: Other +component: TCP endpoint + type: Latency + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges -template: portcheck_service_reachable -families: * - on: portcheck.status - lookup: average -1m unaligned percentage of success - calc: ($this < 75) ? (0) : ($this) - every: 5s - units: up/down - info: average ratio of successful connections over the last minute (at least 75%) - to: silent + template: portcheck_service_reachable + families: * + on: portcheck.status + class: Other +component: TCP endpoint + type: Workload + lookup: average -1m unaligned percentage of success + calc: ($this < 75) ? (0) : ($this) + every: 5s + units: up/down + info: average ratio of successful connections over the last minute (at least 75%) + to: silent -template: portcheck_connection_timeouts -families: * - on: portcheck.status - lookup: average -5m unaligned percentage of timeout - every: 10s - units: % - warn: $this >= 10 AND $this < 40 - crit: $this >= 40 - delay: down 5m multiplier 1.5 max 1h - info: average ratio of timeouts over the last 5 minutes - to: sysadmin + template: portcheck_connection_timeouts + families: * + on: portcheck.status + class: Other +component: TCP endpoint + type: Errors + lookup: average -5m unaligned percentage of timeout + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average ratio of timeouts over the last 5 minutes + to: sysadmin -template: portcheck_connection_fails -families: * - on: portcheck.status - lookup: average -5m unaligned percentage of no_connection,failed - every: 10s - units: % - warn: $this >= 10 AND $this < 40 - crit: $this >= 40 - delay: down 5m multiplier 1.5 max 1h - info: average ratio of failed connections over the last 5 minutes - to: sysadmin + template: portcheck_connection_fails + families: * + on: portcheck.status + class: Other +component: TCP endpoint + type: Errors + lookup: average -5m unaligned percentage of no_connection,failed + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average ratio of failed connections over the last 5 minutes + to: sysadmin |