diff options
Diffstat (limited to '')
-rw-r--r-- | health/health.d/httpcheck.conf | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf new file mode 100644 index 0000000..599c47a --- /dev/null +++ b/health/health.d/httpcheck.conf @@ -0,0 +1,112 @@ + +# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges + template: httpcheck_web_service_up + families: * + on: httpcheck.status + class: Utilization + type: Web Server +component: HTTP endpoint + lookup: average -1m unaligned percentage of success + calc: ($this < 75) ? (0) : ($this) + every: 5s + units: up/down + info: average ratio of successful HTTP requests over the last minute (at least 75%) + to: silent + + template: httpcheck_web_service_bad_content + families: * + on: httpcheck.status + class: Workload + type: Web Server +component: HTTP endpoint + lookup: average -5m unaligned percentage of bad_content + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average ratio of HTTP responses with unexpected content over the last 5 minutes + options: no-clear-notification + to: webmaster + + template: httpcheck_web_service_bad_status + families: * + on: httpcheck.status + class: Workload + type: Web Server +component: HTTP endpoint + lookup: average -5m unaligned percentage of bad_status + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average ratio of HTTP responses with unexpected status over the last 5 minutes + options: no-clear-notification + to: webmaster + + template: httpcheck_web_service_timeouts + families: * + on: httpcheck.status + class: Latency + type: Web Server +component: HTTP endpoint + lookup: average -5m unaligned percentage of timeout + every: 10s + units: % + info: average ratio of HTTP request timeouts over the last 5 minutes + + template: httpcheck_no_web_service_connections + families: * + on: httpcheck.status + class: Errors + type: Other +component: HTTP endpoint + lookup: average -5m unaligned percentage of no_connection + every: 10s + units: % + info: average ratio of failed requests during the last 5 minutes + +# combined timeout & no connection alarm + template: httpcheck_web_service_unreachable + families: * + on: httpcheck.status + class: Errors + type: Web Server +component: HTTP endpoint + calc: ($httpcheck_no_web_service_connections >= $httpcheck_web_service_timeouts) ? ($httpcheck_no_web_service_connections) : ($httpcheck_web_service_timeouts) + units: % + every: 10s + warn: ($httpcheck_no_web_service_connections >= 10 OR $httpcheck_web_service_timeouts >= 10) AND ($httpcheck_no_web_service_connections < 40 OR $httpcheck_web_service_timeouts < 40) + crit: $httpcheck_no_web_service_connections >= 40 OR $httpcheck_web_service_timeouts >= 40 + delay: down 5m multiplier 1.5 max 1h + info: ratio of failed requests either due to timeouts or no connection over the last 5 minutes + options: no-clear-notification + to: webmaster + + template: httpcheck_1h_web_service_response_time + families: * + on: httpcheck.responsetime + class: Latency + type: Other +component: HTTP endpoint + lookup: average -1h unaligned of time + every: 30s + units: ms + info: average HTTP response time over the last hour + + template: httpcheck_web_service_slow + families: * + on: httpcheck.responsetime + class: Latency + type: Web Server +component: HTTP endpoint + lookup: average -3m unaligned of time + units: ms + every: 10s + warn: ($this > ($httpcheck_1h_web_service_response_time * 2) ) + crit: ($this > ($httpcheck_1h_web_service_response_time * 3) ) + delay: down 5m multiplier 1.5 max 1h + info: average HTTP response time over the last 3 minutes, compared to the average over the last hour + options: no-clear-notification + to: webmaster |