diff options
Diffstat (limited to 'conf.d/health.d/httpcheck.conf')
-rw-r--r-- | conf.d/health.d/httpcheck.conf | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/conf.d/health.d/httpcheck.conf b/conf.d/health.d/httpcheck.conf new file mode 100644 index 000000000..0ddf35eab --- /dev/null +++ b/conf.d/health.d/httpcheck.conf @@ -0,0 +1,99 @@ +template: httpcheck_last_collected_secs +families: * + on: httpcheck.status + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + +# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges +template: web_service_up +families: * + on: httpcheck.status + lookup: average -1m unaligned percentage of success + calc: ($this < 75) ? (0) : ($this) + every: 5s + units: up/down + info: at least 75% verified responses during last 60 seconds, ideal for badges + to: silent + +template: web_service_bad_content +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of bad_content + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of unexpected http response content during the last 5 minutes + options: no-clear-notification + to: webmaster + +template: web_service_bad_status +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of bad_status + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of unexpected http status during the last 5 minutes + options: no-clear-notification + to: webmaster + +template: web_service_timeouts +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of timeout + every: 10s + units: % + info: average of timeouts during the last 5 minutes + +template: no_web_service_connections +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of no_connection + every: 10s + units: % + info: average of failed requests during the last 5 minutes + +# combined timeout & no connection alarm +template: web_service_unreachable +families: * + on: httpcheck.status + calc: ($no_web_service_connections >= $web_service_timeouts) ? ($no_web_service_connections) : ($web_service_timeouts) + units: % + every: 10s + warn: ($no_web_service_connections >= 10 OR $web_service_timeouts >= 10) AND ($no_web_service_connections < 40 OR $web_service_timeouts < 40) + crit: $no_web_service_connections >= 40 OR $web_service_timeouts >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of failed requests either due to timeouts or no connection during the last 5 minutes + options: no-clear-notification + to: webmaster + +template: 1h_web_service_response_time +families: * + on: httpcheck.responsetime + lookup: average -1h unaligned of time + every: 30s + units: ms + info: average response time over the last hour + +template: web_service_slow +families: * + on: httpcheck.responsetime + lookup: average -3m unaligned of time + units: ms + every: 10s + warn: ($this > ($1h_web_service_response_time * 2) ) + crit: ($this > ($1h_web_service_response_time * 3) ) + info: average response time over the last 3 minutes, compared to the average over the last hour + delay: down 5m multiplier 1.5 max 1h + options: no-clear-notification + to: webmaster |