summaryrefslogtreecommitdiffstats
path: root/health/health.d/httpcheck.conf
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--health/health.d/httpcheck.conf112
1 files changed, 112 insertions, 0 deletions
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf
new file mode 100644
index 0000000..599c47a
--- /dev/null
+++ b/health/health.d/httpcheck.conf
@@ -0,0 +1,112 @@
+
+# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
+ template: httpcheck_web_service_up
+ families: *
+ on: httpcheck.status
+ class: Utilization
+ type: Web Server
+component: HTTP endpoint
+ lookup: average -1m unaligned percentage of success
+ calc: ($this < 75) ? (0) : ($this)
+ every: 5s
+ units: up/down
+ info: average ratio of successful HTTP requests over the last minute (at least 75%)
+ to: silent
+
+ template: httpcheck_web_service_bad_content
+ families: *
+ on: httpcheck.status
+ class: Workload
+ type: Web Server
+component: HTTP endpoint
+ lookup: average -5m unaligned percentage of bad_content
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average ratio of HTTP responses with unexpected content over the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+ template: httpcheck_web_service_bad_status
+ families: *
+ on: httpcheck.status
+ class: Workload
+ type: Web Server
+component: HTTP endpoint
+ lookup: average -5m unaligned percentage of bad_status
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average ratio of HTTP responses with unexpected status over the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+ template: httpcheck_web_service_timeouts
+ families: *
+ on: httpcheck.status
+ class: Latency
+ type: Web Server
+component: HTTP endpoint
+ lookup: average -5m unaligned percentage of timeout
+ every: 10s
+ units: %
+ info: average ratio of HTTP request timeouts over the last 5 minutes
+
+ template: httpcheck_no_web_service_connections
+ families: *
+ on: httpcheck.status
+ class: Errors
+ type: Other
+component: HTTP endpoint
+ lookup: average -5m unaligned percentage of no_connection
+ every: 10s
+ units: %
+ info: average ratio of failed requests during the last 5 minutes
+
+# combined timeout & no connection alarm
+ template: httpcheck_web_service_unreachable
+ families: *
+ on: httpcheck.status
+ class: Errors
+ type: Web Server
+component: HTTP endpoint
+ calc: ($httpcheck_no_web_service_connections >= $httpcheck_web_service_timeouts) ? ($httpcheck_no_web_service_connections) : ($httpcheck_web_service_timeouts)
+ units: %
+ every: 10s
+ warn: ($httpcheck_no_web_service_connections >= 10 OR $httpcheck_web_service_timeouts >= 10) AND ($httpcheck_no_web_service_connections < 40 OR $httpcheck_web_service_timeouts < 40)
+ crit: $httpcheck_no_web_service_connections >= 40 OR $httpcheck_web_service_timeouts >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: ratio of failed requests either due to timeouts or no connection over the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+ template: httpcheck_1h_web_service_response_time
+ families: *
+ on: httpcheck.responsetime
+ class: Latency
+ type: Other
+component: HTTP endpoint
+ lookup: average -1h unaligned of time
+ every: 30s
+ units: ms
+ info: average HTTP response time over the last hour
+
+ template: httpcheck_web_service_slow
+ families: *
+ on: httpcheck.responsetime
+ class: Latency
+ type: Web Server
+component: HTTP endpoint
+ lookup: average -3m unaligned of time
+ units: ms
+ every: 10s
+ warn: ($this > ($httpcheck_1h_web_service_response_time * 2) )
+ crit: ($this > ($httpcheck_1h_web_service_response_time * 3) )
+ delay: down 5m multiplier 1.5 max 1h
+ info: average HTTP response time over the last 3 minutes, compared to the average over the last hour
+ options: no-clear-notification
+ to: webmaster