summaryrefslogtreecommitdiffstats
path: root/health/health.d/web_log.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/web_log.conf')
-rw-r--r--health/health.d/web_log.conf135
1 files changed, 48 insertions, 87 deletions
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf
index 127c9a9c6..454e0abef 100644
--- a/health/health.d/web_log.conf
+++ b/health/health.d/web_log.conf
@@ -1,22 +1,4 @@
-# make sure we can collect web log data
-
- template: last_collected_secs
- on: web_log.response_codes
- class: Web Server
-component: Web log
- type: Latency
- families: *
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- delay: down 5m multiplier 1.5 max 1h
- info: number of seconds since the last successful data collection
- to: webmaster
-
-
# -----------------------------------------------------------------------------
# high level response code alarms
@@ -29,9 +11,9 @@ component: Web log
template: 1m_requests
on: web_log.response_statuses
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned
calc: ($this == 0)?(1):($this)
@@ -41,9 +23,9 @@ component: Web log
template: 1m_successful
on: web_log.response_statuses
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned of successful_requests
calc: $this * 100 / $1m_requests
@@ -57,41 +39,39 @@ component: Web log
template: 1m_redirects
on: web_log.response_statuses
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned of redirects
calc: $this * 100 / $1m_requests
units: %
every: 10s
warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 20 )) ) : ( 0 )
- crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 )
delay: up 2m down 15m multiplier 1.5 max 1h
info: ratio of redirection HTTP requests over the last minute (3xx except 304)
to: webmaster
template: 1m_bad_requests
on: web_log.response_statuses
- class: Web Server
+ class: Errors
+ type: Web Server
component: Web log
- type: Errors
families: *
lookup: sum -1m unaligned of bad_requests
calc: $this * 100 / $1m_requests
units: %
every: 10s
warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 10 ) : ( 30 )) ) : ( 0 )
- crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 )
delay: up 2m down 15m multiplier 1.5 max 1h
info: ratio of client error HTTP requests over the last minute (4xx except 401)
to: webmaster
template: 1m_internal_errors
on: web_log.response_statuses
- class: Web Server
+ class: Errors
+ type: Web Server
component: Web log
- type: Errors
families: *
lookup: sum -1m unaligned of server_errors
calc: $this * 100 / $1m_requests
@@ -114,9 +94,9 @@ component: Web log
template: 1m_total_requests
on: web_log.response_codes
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned
calc: ($this == 0)?(1):($this)
@@ -126,9 +106,9 @@ component: Web log
template: 1m_unmatched
on: web_log.response_codes
- class: Web Server
+ class: Errors
+ type: Web Server
component: Web log
- type: Errors
families: *
lookup: sum -1m unaligned of unmatched
calc: $this * 100 / $1m_total_requests
@@ -151,9 +131,9 @@ component: Web log
template: 10m_response_time
on: web_log.response_time
- class: System
+ class: Latency
+ type: System
component: Web log
- type: Latency
families: *
lookup: average -10m unaligned of avg
units: ms
@@ -162,9 +142,9 @@ component: Web log
template: web_slow
on: web_log.response_time
- class: Web Server
+ class: Latency
+ type: Web Server
component: Web log
- type: Latency
families: *
lookup: average -1m unaligned of avg
units: ms
@@ -191,9 +171,9 @@ component: Web log
template: 5m_successful_old
on: web_log.response_statuses
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: average -5m at -5m unaligned of successful_requests
units: requests/s
@@ -202,9 +182,9 @@ component: Web log
template: 5m_successful
on: web_log.response_statuses
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: average -5m unaligned of successful_requests
units: requests/s
@@ -213,9 +193,9 @@ component: Web log
template: 5m_requests_ratio
on: web_log.response_codes
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
calc: ($5m_successful_old > 0)?($5m_successful * 100 / $5m_successful_old):(100)
units: %
@@ -233,23 +213,6 @@ component: Web log
# ---------------------------------------------------GO-VERSION---------------------------------------------------------
-# make sure we can collect web log data
-
- template: web_log_last_collected_secs
- on: web_log.requests
- class: Web Server
-component: Web log
- type: Latency
- families: *
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- delay: down 5m multiplier 1.5 max 1h
- info: number of seconds since the last successful data collection
- to: webmaster
-
# unmatched lines
# the following alarms trigger only when there are enough data.
@@ -261,9 +224,9 @@ component: Web log
template: web_log_1m_total_requests
on: web_log.requests
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned
calc: ($this == 0)?(1):($this)
@@ -273,9 +236,9 @@ component: Web log
template: web_log_1m_unmatched
on: web_log.excluded_requests
- class: Web Server
+ class: Errors
+ type: Web Server
component: Web log
- type: Errors
families: *
lookup: sum -1m unaligned of unmatched
calc: $this * 100 / $web_log_1m_total_requests
@@ -298,9 +261,9 @@ component: Web log
template: web_log_1m_requests
on: web_log.type_requests
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned
calc: ($this == 0)?(1):($this)
@@ -310,9 +273,9 @@ component: Web log
template: web_log_1m_successful
on: web_log.type_requests
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned of success
calc: $this * 100 / $web_log_1m_requests
@@ -326,41 +289,39 @@ component: Web log
template: web_log_1m_redirects
on: web_log.type_requests
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: sum -1m unaligned of redirect
calc: $this * 100 / $web_log_1m_requests
units: %
every: 10s
warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 20 )) ) : ( 0 )
- crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 )
delay: up 2m down 15m multiplier 1.5 max 1h
info: ratio of redirection HTTP requests over the last minute (3xx except 304)
to: webmaster
template: web_log_1m_bad_requests
on: web_log.type_requests
- class: Web Server
+ class: Errors
+ type: Web Server
component: Web log
- type: Errors
families: *
lookup: sum -1m unaligned of bad
calc: $this * 100 / $web_log_1m_requests
units: %
every: 10s
warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 10 ) : ( 30 )) ) : ( 0 )
- crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 )
delay: up 2m down 15m multiplier 1.5 max 1h
info: ratio of client error HTTP requests over the last minute (4xx except 401)
to: webmaster
template: web_log_1m_internal_errors
on: web_log.type_requests
- class: Web Server
+ class: Errors
+ type: Web Server
component: Web log
- type: Errors
families: *
lookup: sum -1m unaligned of error
calc: $this * 100 / $web_log_1m_requests
@@ -384,9 +345,9 @@ component: Web log
template: web_log_10m_response_time
on: web_log.request_processing_time
- class: System
+ class: Latency
+ type: System
component: Web log
- type: Latency
families: *
lookup: average -10m unaligned of avg
units: ms
@@ -395,9 +356,9 @@ component: Web log
template: web_log_web_slow
on: web_log.request_processing_time
- class: Web Server
+ class: Latency
+ type: Web Server
component: Web log
- type: Latency
families: *
lookup: average -1m unaligned of avg
units: ms
@@ -424,9 +385,9 @@ component: Web log
template: web_log_5m_successful_old
on: web_log.type_requests
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: average -5m at -5m unaligned of success
units: requests/s
@@ -435,9 +396,9 @@ component: Web log
template: web_log_5m_successful
on: web_log.type_requests
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
lookup: average -5m unaligned of success
units: requests/s
@@ -446,9 +407,9 @@ component: Web log
template: web_log_5m_requests_ratio
on: web_log.type_requests
- class: Web Server
+ class: Workload
+ type: Web Server
component: Web log
- type: Workload
families: *
calc: ($web_log_5m_successful_old > 0)?($web_log_5m_successful * 100 / $web_log_5m_successful_old):(100)
units: %