summaryrefslogtreecommitdiffstats
path: root/health/health.d/httpcheck.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/httpcheck.conf')
-rw-r--r--health/health.d/httpcheck.conf205
1 files changed, 116 insertions, 89 deletions
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf
index 0158f63eb..d4d6376a3 100644
--- a/health/health.d/httpcheck.conf
+++ b/health/health.d/httpcheck.conf
@@ -1,99 +1,126 @@
-template: httpcheck_last_collected_secs
-families: *
- on: httpcheck.status
- calc: $now - $last_collected_t
- every: 10s
- units: seconds ago
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- delay: down 5m multiplier 1.5 max 1h
- info: number of seconds since the last successful data collection
- to: sysadmin
+ template: httpcheck_last_collected_secs
+ families: *
+ on: httpcheck.status
+ class: Other
+component: HTTP endpoint
+ type: Latency
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
-template: httpcheck_web_service_up
-families: *
- on: httpcheck.status
- lookup: average -1m unaligned percentage of success
- calc: ($this < 75) ? (0) : ($this)
- every: 5s
- units: up/down
- info: average ratio of successful HTTP requests over the last minute (at least 75%)
- to: silent
+ template: httpcheck_web_service_up
+ families: *
+ on: httpcheck.status
+ class: Web Server
+component: HTTP endpoint
+ type: Utilization
+ lookup: average -1m unaligned percentage of success
+ calc: ($this < 75) ? (0) : ($this)
+ every: 5s
+ units: up/down
+ info: average ratio of successful HTTP requests over the last minute (at least 75%)
+ to: silent
-template: httpcheck_web_service_bad_content
-families: *
- on: httpcheck.status
- lookup: average -5m unaligned percentage of bad_content
- every: 10s
- units: %
- warn: $this >= 10 AND $this < 40
- crit: $this >= 40
- delay: down 5m multiplier 1.5 max 1h
- info: average ratio of HTTP responses with unexpected content over the last 5 minutes
- options: no-clear-notification
- to: webmaster
+ template: httpcheck_web_service_bad_content
+ families: *
+ on: httpcheck.status
+ class: Web Server
+component: HTTP endpoint
+ type: Workload
+ lookup: average -5m unaligned percentage of bad_content
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average ratio of HTTP responses with unexpected content over the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
-template: httpcheck_web_service_bad_status
-families: *
- on: httpcheck.status
- lookup: average -5m unaligned percentage of bad_status
- every: 10s
- units: %
- warn: $this >= 10 AND $this < 40
- crit: $this >= 40
- delay: down 5m multiplier 1.5 max 1h
- info: average ratio of HTTP responses with unexpected status over the last 5 minutes
- options: no-clear-notification
- to: webmaster
+ template: httpcheck_web_service_bad_status
+ families: *
+ on: httpcheck.status
+ class: Web Server
+component: HTTP endpoint
+ type: Workload
+ lookup: average -5m unaligned percentage of bad_status
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average ratio of HTTP responses with unexpected status over the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
-template: httpcheck_web_service_timeouts
-families: *
- on: httpcheck.status
- lookup: average -5m unaligned percentage of timeout
- every: 10s
- units: %
- info: average ratio of HTTP request timeouts over the last 5 minutes
+ template: httpcheck_web_service_timeouts
+ families: *
+ on: httpcheck.status
+ class: Web Server
+component: HTTP endpoint
+ type: Latency
+ lookup: average -5m unaligned percentage of timeout
+ every: 10s
+ units: %
+ info: average ratio of HTTP request timeouts over the last 5 minutes
-template: httpcheck_no_web_service_connections
-families: *
- on: httpcheck.status
- lookup: average -5m unaligned percentage of no_connection
- every: 10s
- units: %
- info: average ratio of failed requests during the last 5 minutes
+ template: httpcheck_no_web_service_connections
+ families: *
+ on: httpcheck.status
+ class: Other
+component: HTTP endpoint
+ type: Errors
+ lookup: average -5m unaligned percentage of no_connection
+ every: 10s
+ units: %
+ info: average ratio of failed requests during the last 5 minutes
# combined timeout & no connection alarm
-template: httpcheck_web_service_unreachable
-families: *
- on: httpcheck.status
- calc: ($httpcheck_no_web_service_connections >= $httpcheck_web_service_timeouts) ? ($httpcheck_no_web_service_connections) : ($httpcheck_web_service_timeouts)
- units: %
- every: 10s
- warn: ($httpcheck_no_web_service_connections >= 10 OR $httpcheck_web_service_timeouts >= 10) AND ($httpcheck_no_web_service_connections < 40 OR $httpcheck_web_service_timeouts < 40)
- crit: $httpcheck_no_web_service_connections >= 40 OR $httpcheck_web_service_timeouts >= 40
- delay: down 5m multiplier 1.5 max 1h
- info: ratio of failed requests either due to timeouts or no connection over the last 5 minutes
- options: no-clear-notification
- to: webmaster
+ template: httpcheck_web_service_unreachable
+ families: *
+ on: httpcheck.status
+ class: Web Server
+component: HTTP endpoint
+ type: Errors
+ calc: ($httpcheck_no_web_service_connections >= $httpcheck_web_service_timeouts) ? ($httpcheck_no_web_service_connections) : ($httpcheck_web_service_timeouts)
+ units: %
+ every: 10s
+ warn: ($httpcheck_no_web_service_connections >= 10 OR $httpcheck_web_service_timeouts >= 10) AND ($httpcheck_no_web_service_connections < 40 OR $httpcheck_web_service_timeouts < 40)
+ crit: $httpcheck_no_web_service_connections >= 40 OR $httpcheck_web_service_timeouts >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: ratio of failed requests either due to timeouts or no connection over the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
-template: httpcheck_1h_web_service_response_time
-families: *
- on: httpcheck.responsetime
- lookup: average -1h unaligned of time
- every: 30s
- units: ms
- info: average HTTP response time over the last hour
+ template: httpcheck_1h_web_service_response_time
+ families: *
+ on: httpcheck.responsetime
+ class: Other
+component: HTTP endpoint
+ type: Latency
+ lookup: average -1h unaligned of time
+ every: 30s
+ units: ms
+ info: average HTTP response time over the last hour
-template: httpcheck_web_service_slow
-families: *
- on: httpcheck.responsetime
- lookup: average -3m unaligned of time
- units: ms
- every: 10s
- warn: ($this > ($httpcheck_1h_web_service_response_time * 2) )
- crit: ($this > ($httpcheck_1h_web_service_response_time * 3) )
- delay: down 5m multiplier 1.5 max 1h
- info: average HTTP response time over the last 3 minutes, compared to the average over the last hour
- options: no-clear-notification
- to: webmaster
+ template: httpcheck_web_service_slow
+ families: *
+ on: httpcheck.responsetime
+ class: Web Server
+component: HTTP endpoint
+ type: Latency
+ lookup: average -3m unaligned of time
+ units: ms
+ every: 10s
+ warn: ($this > ($httpcheck_1h_web_service_response_time * 2) )
+ crit: ($this > ($httpcheck_1h_web_service_response_time * 3) )
+ delay: down 5m multiplier 1.5 max 1h
+ info: average HTTP response time over the last 3 minutes, compared to the average over the last hour
+ options: no-clear-notification
+ to: webmaster