summaryrefslogtreecommitdiffstats
path: root/health/health.d/hdfs.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/hdfs.conf')
-rw-r--r--health/health.d/hdfs.conf37
1 files changed, 10 insertions, 27 deletions
diff --git a/health/health.d/hdfs.conf b/health/health.d/hdfs.conf
index bd8308bed..ca8df31b9 100644
--- a/health/health.d/hdfs.conf
+++ b/health/health.d/hdfs.conf
@@ -1,28 +1,11 @@
-# make sure hdfs is running
-
- template: hdfs_last_collected_secs
- on: hdfs.heap_memory
- class: Storage
-component: HDFS
- type: Latency
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- delay: down 5m multiplier 1.5 max 1h
- info: number of seconds since the last successful data collection
- to: webmaster
-
-
# Common
template: hdfs_capacity_usage
on: hdfs.capacity
- class: Storage
+ class: Utilization
+ type: Storage
component: HDFS
- type: Utilization
calc: ($used) * 100 / ($used + $remaining)
units: %
every: 10s
@@ -37,9 +20,9 @@ component: HDFS
template: hdfs_missing_blocks
on: hdfs.blocks
- class: Storage
+ class: Errors
+ type: Storage
component: HDFS
- type: Errors
calc: $missing
units: missing blocks
every: 10s
@@ -51,9 +34,9 @@ component: HDFS
template: hdfs_stale_nodes
on: hdfs.data_nodes
- class: Storage
+ class: Errors
+ type: Storage
component: HDFS
- type: Errors
calc: $stale
units: dead nodes
every: 10s
@@ -65,9 +48,9 @@ component: HDFS
template: hdfs_dead_nodes
on: hdfs.data_nodes
- class: Storage
+ class: Errors
+ type: Storage
component: HDFS
- type: Errors
calc: $dead
units: dead nodes
every: 10s
@@ -81,9 +64,9 @@ component: HDFS
template: hdfs_num_failed_volumes
on: hdfs.num_failed_volumes
- class: Storage
+ class: Errors
+ type: Storage
component: HDFS
- type: Errors
calc: $fsds_num_failed_volumes
units: failed volumes
every: 10s