summaryrefslogtreecommitdiffstats
path: root/conf.d/health.d/httpcheck.conf
diff options
context:
space:
mode:
authorFederico Ceratto <federico.ceratto@gmail.com>2018-03-27 21:28:21 +0000
committerFederico Ceratto <federico.ceratto@gmail.com>2018-03-27 21:28:21 +0000
commitd4dd00f58a502c9ca4b63e36ce6bc7a9945dc63c (patch)
treefaac99f51f182bb8c0a03e95e393d421ac9ddf42 /conf.d/health.d/httpcheck.conf
parentNew upstream version 1.9.0+dfsg (diff)
downloadnetdata-d4dd00f58a502c9ca4b63e36ce6bc7a9945dc63c.tar.xz
netdata-d4dd00f58a502c9ca4b63e36ce6bc7a9945dc63c.zip
New upstream version 1.10.0+dfsgupstream/1.10.0+dfsg
Diffstat (limited to 'conf.d/health.d/httpcheck.conf')
-rw-r--r--conf.d/health.d/httpcheck.conf99
1 files changed, 99 insertions, 0 deletions
diff --git a/conf.d/health.d/httpcheck.conf b/conf.d/health.d/httpcheck.conf
new file mode 100644
index 00000000..0ddf35ea
--- /dev/null
+++ b/conf.d/health.d/httpcheck.conf
@@ -0,0 +1,99 @@
+template: httpcheck_last_collected_secs
+families: *
+ on: httpcheck.status
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
+template: web_service_up
+families: *
+ on: httpcheck.status
+ lookup: average -1m unaligned percentage of success
+ calc: ($this < 75) ? (0) : ($this)
+ every: 5s
+ units: up/down
+ info: at least 75% verified responses during last 60 seconds, ideal for badges
+ to: silent
+
+template: web_service_bad_content
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of bad_content
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of unexpected http response content during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: web_service_bad_status
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of bad_status
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of unexpected http status during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: web_service_timeouts
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of timeout
+ every: 10s
+ units: %
+ info: average of timeouts during the last 5 minutes
+
+template: no_web_service_connections
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of no_connection
+ every: 10s
+ units: %
+ info: average of failed requests during the last 5 minutes
+
+# combined timeout & no connection alarm
+template: web_service_unreachable
+families: *
+ on: httpcheck.status
+ calc: ($no_web_service_connections >= $web_service_timeouts) ? ($no_web_service_connections) : ($web_service_timeouts)
+ units: %
+ every: 10s
+ warn: ($no_web_service_connections >= 10 OR $web_service_timeouts >= 10) AND ($no_web_service_connections < 40 OR $web_service_timeouts < 40)
+ crit: $no_web_service_connections >= 40 OR $web_service_timeouts >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of failed requests either due to timeouts or no connection during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: 1h_web_service_response_time
+families: *
+ on: httpcheck.responsetime
+ lookup: average -1h unaligned of time
+ every: 30s
+ units: ms
+ info: average response time over the last hour
+
+template: web_service_slow
+families: *
+ on: httpcheck.responsetime
+ lookup: average -3m unaligned of time
+ units: ms
+ every: 10s
+ warn: ($this > ($1h_web_service_response_time * 2) )
+ crit: ($this > ($1h_web_service_response_time * 3) )
+ info: average response time over the last 3 minutes, compared to the average over the last hour
+ delay: down 5m multiplier 1.5 max 1h
+ options: no-clear-notification
+ to: webmaster