summaryrefslogtreecommitdiffstats
path: root/health/health.d/hdfs.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/hdfs.conf')
-rw-r--r--health/health.d/hdfs.conf81
1 files changed, 0 insertions, 81 deletions
diff --git a/health/health.d/hdfs.conf b/health/health.d/hdfs.conf
deleted file mode 100644
index 566e815aa..000000000
--- a/health/health.d/hdfs.conf
+++ /dev/null
@@ -1,81 +0,0 @@
-
-# Common
-
- template: hdfs_capacity_usage
- on: hdfs.capacity
- class: Utilization
- type: Storage
-component: HDFS
- calc: ($used) * 100 / ($used + $remaining)
- units: %
- every: 10s
- warn: $this > (($status >= $WARNING) ? (70) : (80))
- crit: $this > (($status == $CRITICAL) ? (80) : (98))
- delay: down 15m multiplier 1.5 max 1h
- summary: HDFS datanodes space utilization
- info: summary datanodes space capacity utilization
- to: sysadmin
-
-
-# NameNode
-
- template: hdfs_missing_blocks
- on: hdfs.blocks
- class: Errors
- type: Storage
-component: HDFS
- calc: $missing
- units: missing blocks
- every: 10s
- warn: $this > 0
- delay: down 15m multiplier 1.5 max 1h
- summary: HDFS missing blocks
- info: number of missing blocks
- to: sysadmin
-
-
- template: hdfs_stale_nodes
- on: hdfs.data_nodes
- class: Errors
- type: Storage
-component: HDFS
- calc: $stale
- units: dead nodes
- every: 10s
- warn: $this > 0
- delay: down 15m multiplier 1.5 max 1h
- summary: HDFS stale datanodes
- info: number of datanodes marked stale due to delayed heartbeat
- to: sysadmin
-
-
- template: hdfs_dead_nodes
- on: hdfs.data_nodes
- class: Errors
- type: Storage
-component: HDFS
- calc: $dead
- units: dead nodes
- every: 10s
- crit: $this > 0
- delay: down 15m multiplier 1.5 max 1h
- summary: HDFS dead datanodes
- info: number of datanodes which are currently dead
- to: sysadmin
-
-
-# DataNode
-
- template: hdfs_num_failed_volumes
- on: hdfs.num_failed_volumes
- class: Errors
- type: Storage
-component: HDFS
- calc: $fsds_num_failed_volumes
- units: failed volumes
- every: 10s
- warn: $this > 0
- delay: down 15m multiplier 1.5 max 1h
- summary: HDFS failed volumes
- info: number of failed volumes
- to: sysadmin