src/go/collectors/go.d.plugin/modules/hdfs/metadata.yaml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388

plugin_name: go.d.plugin
modules:
  - meta:
      id: collector-go.d.plugin-hfs
      plugin_name: go.d.plugin
      module_name: hfs
      monitored_instance:
        name: Hadoop Distributed File System (HDFS)
        link: https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html
        icon_filename: hadoop.svg
        categories:
          - data-collection.storage-mount-points-and-filesystems
      keywords:
        - hdfs
        - hadoop
      related_resources:
        integrations:
          list: []
      info_provided_to_referring_integrations:
        description: ""
      most_popular: true
    overview:
      data_collection:
        metrics_description: |
          This collector monitors HDFS nodes.

          Netdata accesses HDFS metrics over `Java Management Extensions` (JMX) through the web interface of an HDFS daemon.
        method_description: ""
      supported_platforms:
        include: []
        exclude: []
      multi_instance: true
      additional_permissions:
        description: ""
      default_behavior:
        auto_detection:
          description: ""
        limits:
          description: ""
        performance_impact:
          description: ""
    setup:
      prerequisites:
        list: []
      configuration:
        file:
          name: go.d/hdfs.conf
        options:
          description: |
            The following options can be defined globally: update_every, autodetection_retry.
          folding:
            title: Config options
            enabled: true
          list:
            - name: update_every
              description: Data collection frequency.
              default_value: 1
              required: false
            - name: autodetection_retry
              description: Recheck interval in seconds. Zero means no recheck will be scheduled.
              default_value: 0
              required: false
            - name: url
              description: Server URL.
              default_value: http://127.0.0.1:9870/jmx
              required: true
            - name: timeout
              description: HTTP request timeout.
              default_value: 1
              required: false
            - name: username
              description: Username for basic HTTP authentication.
              default_value: ""
              required: false
            - name: password
              description: Password for basic HTTP authentication.
              default_value: ""
              required: false
            - name: proxy_url
              description: Proxy URL.
              default_value: ""
              required: false
            - name: proxy_username
              description: Username for proxy basic HTTP authentication.
              default_value: ""
              required: false
            - name: proxy_password
              description: Password for proxy basic HTTP authentication.
              default_value: ""
              required: false
            - name: method
              description: HTTP request method.
              default_value: "GET"
              required: false
            - name: body
              description: HTTP request body.
              default_value: ""
              required: false
            - name: headers
              description: HTTP request headers.
              default_value: ""
              required: false
            - name: not_follow_redirects
              description: Redirect handling policy. Controls whether the client follows redirects.
              default_value: no
              required: false
            - name: tls_skip_verify
              description: Server certificate chain and hostname validation policy. Controls whether the client performs this check.
              default_value: no
              required: false
            - name: tls_ca
              description: Certification authority that the client uses when verifying the server's certificates.
              default_value: ""
              required: false
            - name: tls_cert
              description: Client TLS certificate.
              default_value: ""
              required: false
            - name: tls_key
              description: Client TLS key.
              default_value: ""
              required: false
        examples:
          folding:
            title: Config
            enabled: true
          list:
            - name: Basic
              folding:
                enabled: false
              description: A basic example configuration.
              config: |
                jobs:
                  - name: local
                    url: http://127.0.0.1:9870/jmx
            - name: HTTP authentication
              description: Basic HTTP authentication.
              config: |
                jobs:
                  - name: local
                    url: http://127.0.0.1:9870/jmx
                    username: username
                    password: password
            - name: HTTPS with self-signed certificate
              description: |
                Do not validate server certificate chain and hostname.
              config: |
                jobs:
                  - name: local
                    url: https://127.0.0.1:9870/jmx
                    tls_skip_verify: yes
            - name: Multi-instance
              description: |
                > **Note**: When you define multiple jobs, their names must be unique.
                
                Collecting metrics from local and remote instances.
              config: |
                jobs:
                  - name: local
                    url: http://127.0.0.1:9870/jmx
                
                  - name: remote
                    url: http://192.0.2.1:9870/jmx
    troubleshooting:
      problems:
        list: []
    alerts:
      - name: hdfs_capacity_usage
        metric: hdfs.capacity
        info: summary datanodes space capacity utilization
        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
      - name: hdfs_missing_blocks
        metric: hdfs.blocks
        info: number of missing blocks
        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
      - name: hdfs_stale_nodes
        metric: hdfs.data_nodes
        info: number of datanodes marked stale due to delayed heartbeat
        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
      - name: hdfs_dead_nodes
        metric: hdfs.data_nodes
        info: number of datanodes which are currently dead
        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
      - name: hdfs_num_failed_volumes
        metric: hdfs.num_failed_volumes
        info: number of failed volumes
        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
    metrics:
      folding:
        title: Metrics
        enabled: false
      description: ""
      availability:
        - DataNode
        - NameNode
      scopes:
        - name: global
          description: These metrics refer to the entire monitored application.
          labels: []
          metrics:
            - name: hdfs.heap_memory
              description: Heap Memory
              unit: MiB
              chart_type: area
              dimensions:
                - name: committed
                - name: used
            - name: hdfs.gc_count_total
              description: GC Events
              unit: events/s
              chart_type: line
              dimensions:
                - name: gc
            - name: hdfs.gc_time_total
              description: GC Time
              unit: ms
              chart_type: line
              dimensions:
                - name: ms
            - name: hdfs.gc_threshold
              description: Number of Times That the GC Threshold is Exceeded
              unit: events/s
              chart_type: line
              dimensions:
                - name: info
                - name: warn
            - name: hdfs.threads
              description: Number of Threads
              unit: num
              chart_type: stacked
              dimensions:
                - name: new
                - name: runnable
                - name: blocked
                - name: waiting
                - name: timed_waiting
                - name: terminated
            - name: hdfs.logs_total
              description: Number of Logs
              unit: logs/s
              chart_type: stacked
              dimensions:
                - name: info
                - name: error
                - name: warn
                - name: fatal
            - name: hdfs.rpc_bandwidth
              description: RPC Bandwidth
              unit: kilobits/s
              chart_type: area
              dimensions:
                - name: received
                - name: sent
            - name: hdfs.rpc_calls
              description: RPC Calls
              unit: calls/s
              chart_type: line
              dimensions:
                - name: calls
            - name: hdfs.open_connections
              description: RPC Open Connections
              unit: connections
              chart_type: line
              dimensions:
                - name: open
            - name: hdfs.call_queue_length
              description: RPC Call Queue Length
              unit: num
              chart_type: line
              dimensions:
                - name: length
            - name: hdfs.avg_queue_time
              description: RPC Avg Queue Time
              unit: ms
              chart_type: line
              dimensions:
                - name: time
            - name: hdfs.avg_processing_time
              description: RPC Avg Processing Time
              unit: ms
              chart_type: line
              dimensions:
                - name: time
            - name: hdfs.capacity
              description: Capacity Across All Datanodes
              unit: KiB
              chart_type: stacked
              availability:
                - NameNode
              dimensions:
                - name: remaining
                - name: used
            - name: hdfs.used_capacity
              description: Used Capacity Across All Datanodes
              unit: KiB
              chart_type: stacked
              availability:
                - NameNode
              dimensions:
                - name: dfs
                - name: non_dfs
            - name: hdfs.load
              description: Number of Concurrent File Accesses (read/write) Across All DataNodes
              unit: load
              chart_type: line
              availability:
                - NameNode
              dimensions:
                - name: load
            - name: hdfs.volume_failures_total
              description: Number of Volume Failures Across All Datanodes
              unit: events/s
              chart_type: line
              availability:
                - NameNode
              dimensions:
                - name: failures
            - name: hdfs.files_total
              description: Number of Tracked Files
              unit: num
              chart_type: line
              availability:
                - NameNode
              dimensions:
                - name: files
            - name: hdfs.blocks_total
              description: Number of Allocated Blocks in the System
              unit: num
              chart_type: line
              availability:
                - NameNode
              dimensions:
                - name: blocks
            - name: hdfs.blocks
              description: Number of Problem Blocks (can point to an unhealthy cluster)
              unit: num
              chart_type: line
              availability:
                - NameNode
              dimensions:
                - name: corrupt
                - name: missing
                - name: under_replicated
            - name: hdfs.data_nodes
              description: Number of Data Nodes By Status
              unit: num
              chart_type: stacked
              availability:
                - NameNode
              dimensions:
                - name: live
                - name: dead
                - name: stale
            - name: hdfs.datanode_capacity
              description: Capacity
              unit: KiB
              chart_type: stacked
              availability:
                - DataNode
              dimensions:
                - name: remaining
                - name: used
            - name: hdfs.datanode_used_capacity
              description: Used Capacity
              unit: KiB
              chart_type: stacked
              availability:
                - DataNode
              dimensions:
                - name: dfs
                - name: non_dfs
            - name: hdfs.datanode_failed_volumes
              description: Number of Failed Volumes
              unit: num
              chart_type: line
              availability:
                - DataNode
              dimensions:
                - name: failed volumes
            - name: hdfs.datanode_bandwidth
              description: Bandwidth
              unit: KiB/s
              chart_type: area
              availability:
                - DataNode
              dimensions:
                - name: reads
                - name: writes