summaryrefslogtreecommitdiffstats
path: root/conf.d/health.d/httpcheck.conf
diff options
context:
space:
mode:
Diffstat (limited to 'conf.d/health.d/httpcheck.conf')
-rw-r--r--conf.d/health.d/httpcheck.conf99
1 files changed, 99 insertions, 0 deletions
diff --git a/conf.d/health.d/httpcheck.conf b/conf.d/health.d/httpcheck.conf
new file mode 100644
index 000000000..0ddf35eab
--- /dev/null
+++ b/conf.d/health.d/httpcheck.conf
@@ -0,0 +1,99 @@
+template: httpcheck_last_collected_secs
+families: *
+ on: httpcheck.status
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
+template: web_service_up
+families: *
+ on: httpcheck.status
+ lookup: average -1m unaligned percentage of success
+ calc: ($this < 75) ? (0) : ($this)
+ every: 5s
+ units: up/down
+ info: at least 75% verified responses during last 60 seconds, ideal for badges
+ to: silent
+
+template: web_service_bad_content
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of bad_content
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of unexpected http response content during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: web_service_bad_status
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of bad_status
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of unexpected http status during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: web_service_timeouts
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of timeout
+ every: 10s
+ units: %
+ info: average of timeouts during the last 5 minutes
+
+template: no_web_service_connections
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of no_connection
+ every: 10s
+ units: %
+ info: average of failed requests during the last 5 minutes
+
+# combined timeout & no connection alarm
+template: web_service_unreachable
+families: *
+ on: httpcheck.status
+ calc: ($no_web_service_connections >= $web_service_timeouts) ? ($no_web_service_connections) : ($web_service_timeouts)
+ units: %
+ every: 10s
+ warn: ($no_web_service_connections >= 10 OR $web_service_timeouts >= 10) AND ($no_web_service_connections < 40 OR $web_service_timeouts < 40)
+ crit: $no_web_service_connections >= 40 OR $web_service_timeouts >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of failed requests either due to timeouts or no connection during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: 1h_web_service_response_time
+families: *
+ on: httpcheck.responsetime
+ lookup: average -1h unaligned of time
+ every: 30s
+ units: ms
+ info: average response time over the last hour
+
+template: web_service_slow
+families: *
+ on: httpcheck.responsetime
+ lookup: average -3m unaligned of time
+ units: ms
+ every: 10s
+ warn: ($this > ($1h_web_service_response_time * 2) )
+ crit: ($this > ($1h_web_service_response_time * 3) )
+ info: average response time over the last 3 minutes, compared to the average over the last hour
+ delay: down 5m multiplier 1.5 max 1h
+ options: no-clear-notification
+ to: webmaster