diff options
Diffstat (limited to '')
-rw-r--r-- | health/health.d/web_log.conf | 135 |
1 files changed, 48 insertions, 87 deletions
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf index 127c9a9c6..454e0abef 100644 --- a/health/health.d/web_log.conf +++ b/health/health.d/web_log.conf @@ -1,22 +1,4 @@ -# make sure we can collect web log data - - template: last_collected_secs - on: web_log.response_codes - class: Web Server -component: Web log - type: Latency - families: * - calc: $now - $last_collected_t - units: seconds ago - every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection - to: webmaster - - # ----------------------------------------------------------------------------- # high level response code alarms @@ -29,9 +11,9 @@ component: Web log template: 1m_requests on: web_log.response_statuses - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned calc: ($this == 0)?(1):($this) @@ -41,9 +23,9 @@ component: Web log template: 1m_successful on: web_log.response_statuses - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned of successful_requests calc: $this * 100 / $1m_requests @@ -57,41 +39,39 @@ component: Web log template: 1m_redirects on: web_log.response_statuses - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned of redirects calc: $this * 100 / $1m_requests units: % every: 10s warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 20 )) ) : ( 0 ) - crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 ) delay: up 2m down 15m multiplier 1.5 max 1h info: ratio of redirection HTTP requests over the last minute (3xx except 304) to: webmaster template: 1m_bad_requests on: web_log.response_statuses - class: Web Server + class: Errors + type: Web Server component: Web log - type: Errors families: * lookup: sum -1m unaligned of bad_requests calc: $this * 100 / $1m_requests units: % every: 10s warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 10 ) : ( 30 )) ) : ( 0 ) - crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 ) delay: up 2m down 15m multiplier 1.5 max 1h info: ratio of client error HTTP requests over the last minute (4xx except 401) to: webmaster template: 1m_internal_errors on: web_log.response_statuses - class: Web Server + class: Errors + type: Web Server component: Web log - type: Errors families: * lookup: sum -1m unaligned of server_errors calc: $this * 100 / $1m_requests @@ -114,9 +94,9 @@ component: Web log template: 1m_total_requests on: web_log.response_codes - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned calc: ($this == 0)?(1):($this) @@ -126,9 +106,9 @@ component: Web log template: 1m_unmatched on: web_log.response_codes - class: Web Server + class: Errors + type: Web Server component: Web log - type: Errors families: * lookup: sum -1m unaligned of unmatched calc: $this * 100 / $1m_total_requests @@ -151,9 +131,9 @@ component: Web log template: 10m_response_time on: web_log.response_time - class: System + class: Latency + type: System component: Web log - type: Latency families: * lookup: average -10m unaligned of avg units: ms @@ -162,9 +142,9 @@ component: Web log template: web_slow on: web_log.response_time - class: Web Server + class: Latency + type: Web Server component: Web log - type: Latency families: * lookup: average -1m unaligned of avg units: ms @@ -191,9 +171,9 @@ component: Web log template: 5m_successful_old on: web_log.response_statuses - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: average -5m at -5m unaligned of successful_requests units: requests/s @@ -202,9 +182,9 @@ component: Web log template: 5m_successful on: web_log.response_statuses - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: average -5m unaligned of successful_requests units: requests/s @@ -213,9 +193,9 @@ component: Web log template: 5m_requests_ratio on: web_log.response_codes - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * calc: ($5m_successful_old > 0)?($5m_successful * 100 / $5m_successful_old):(100) units: % @@ -233,23 +213,6 @@ component: Web log # ---------------------------------------------------GO-VERSION--------------------------------------------------------- -# make sure we can collect web log data - - template: web_log_last_collected_secs - on: web_log.requests - class: Web Server -component: Web log - type: Latency - families: * - calc: $now - $last_collected_t - units: seconds ago - every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection - to: webmaster - # unmatched lines # the following alarms trigger only when there are enough data. @@ -261,9 +224,9 @@ component: Web log template: web_log_1m_total_requests on: web_log.requests - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned calc: ($this == 0)?(1):($this) @@ -273,9 +236,9 @@ component: Web log template: web_log_1m_unmatched on: web_log.excluded_requests - class: Web Server + class: Errors + type: Web Server component: Web log - type: Errors families: * lookup: sum -1m unaligned of unmatched calc: $this * 100 / $web_log_1m_total_requests @@ -298,9 +261,9 @@ component: Web log template: web_log_1m_requests on: web_log.type_requests - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned calc: ($this == 0)?(1):($this) @@ -310,9 +273,9 @@ component: Web log template: web_log_1m_successful on: web_log.type_requests - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned of success calc: $this * 100 / $web_log_1m_requests @@ -326,41 +289,39 @@ component: Web log template: web_log_1m_redirects on: web_log.type_requests - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: sum -1m unaligned of redirect calc: $this * 100 / $web_log_1m_requests units: % every: 10s warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 20 )) ) : ( 0 ) - crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 ) delay: up 2m down 15m multiplier 1.5 max 1h info: ratio of redirection HTTP requests over the last minute (3xx except 304) to: webmaster template: web_log_1m_bad_requests on: web_log.type_requests - class: Web Server + class: Errors + type: Web Server component: Web log - type: Errors families: * lookup: sum -1m unaligned of bad calc: $this * 100 / $web_log_1m_requests units: % every: 10s warn: ($web_log_1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 10 ) : ( 30 )) ) : ( 0 ) - crit: ($web_log_1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 ) delay: up 2m down 15m multiplier 1.5 max 1h info: ratio of client error HTTP requests over the last minute (4xx except 401) to: webmaster template: web_log_1m_internal_errors on: web_log.type_requests - class: Web Server + class: Errors + type: Web Server component: Web log - type: Errors families: * lookup: sum -1m unaligned of error calc: $this * 100 / $web_log_1m_requests @@ -384,9 +345,9 @@ component: Web log template: web_log_10m_response_time on: web_log.request_processing_time - class: System + class: Latency + type: System component: Web log - type: Latency families: * lookup: average -10m unaligned of avg units: ms @@ -395,9 +356,9 @@ component: Web log template: web_log_web_slow on: web_log.request_processing_time - class: Web Server + class: Latency + type: Web Server component: Web log - type: Latency families: * lookup: average -1m unaligned of avg units: ms @@ -424,9 +385,9 @@ component: Web log template: web_log_5m_successful_old on: web_log.type_requests - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: average -5m at -5m unaligned of success units: requests/s @@ -435,9 +396,9 @@ component: Web log template: web_log_5m_successful on: web_log.type_requests - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * lookup: average -5m unaligned of success units: requests/s @@ -446,9 +407,9 @@ component: Web log template: web_log_5m_requests_ratio on: web_log.type_requests - class: Web Server + class: Workload + type: Web Server component: Web log - type: Workload families: * calc: ($web_log_5m_successful_old > 0)?($web_log_5m_successful * 100 / $web_log_5m_successful_old):(100) units: % |