summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/hdfs
diff options
context:
space:
mode:
Diffstat (limited to '')
l---------src/go/collectors/go.d.plugin/modules/hdfs/README.md1
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/charts.go328
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/client.go69
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/collect.go201
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/config_schema.json180
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/hdfs.go132
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/hdfs_test.go316
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/init.go25
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/integrations/hadoop_distributed_file_system_hdfs.md251
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/metadata.yaml388
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/metrics.go245
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/raw_data.go51
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.json20
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.yaml17
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/testdata/datanode.json165
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/testdata/namenode.json132
-rw-r--r--src/go/collectors/go.d.plugin/modules/hdfs/testdata/unknownnode.json34
17 files changed, 2555 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/README.md b/src/go/collectors/go.d.plugin/modules/hdfs/README.md
new file mode 120000
index 000000000..38f428a06
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/README.md
@@ -0,0 +1 @@
+integrations/hadoop_distributed_file_system_hdfs.md \ No newline at end of file
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/charts.go b/src/go/collectors/go.d.plugin/modules/hdfs/charts.go
new file mode 100644
index 000000000..94af99d2f
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/charts.go
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package hdfs
+
+import "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+
+type (
+ Charts = module.Charts
+ Dims = module.Dims
+ Vars = module.Vars
+)
+
+var jvmCharts = Charts{
+ {
+ ID: "jvm_heap_memory",
+ Title: "Heap Memory",
+ Units: "MiB",
+ Fam: "jvm",
+ Ctx: "hdfs.heap_memory",
+ Type: module.Area,
+ Dims: Dims{
+ {ID: "jvm_mem_heap_committed", Name: "committed", Div: 1000},
+ {ID: "jvm_mem_heap_used", Name: "used", Div: 1000},
+ },
+ Vars: Vars{
+ {ID: "jvm_mem_heap_max"},
+ },
+ },
+ {
+ ID: "jvm_gc_count_total",
+ Title: "GC Events",
+ Units: "events/s",
+ Fam: "jvm",
+ Ctx: "hdfs.gc_count_total",
+ Dims: Dims{
+ {ID: "jvm_gc_count", Name: "gc", Algo: module.Incremental},
+ },
+ },
+ {
+ ID: "jvm_gc_time_total",
+ Title: "GC Time",
+ Units: "ms",
+ Fam: "jvm",
+ Ctx: "hdfs.gc_time_total",
+ Dims: Dims{
+ {ID: "jvm_gc_time_millis", Name: "time", Algo: module.Incremental},
+ },
+ },
+ {
+ ID: "jvm_gc_threshold",
+ Title: "Number of Times That the GC Threshold is Exceeded",
+ Units: "events/s",
+ Fam: "jvm",
+ Ctx: "hdfs.gc_threshold",
+ Dims: Dims{
+ {ID: "jvm_gc_num_info_threshold_exceeded", Name: "info", Algo: module.Incremental},
+ {ID: "jvm_gc_num_warn_threshold_exceeded", Name: "warn", Algo: module.Incremental},
+ },
+ },
+ {
+ ID: "jvm_threads",
+ Title: "Number of Threads",
+ Units: "num",
+ Fam: "jvm",
+ Ctx: "hdfs.threads",
+ Type: module.Stacked,
+ Dims: Dims{
+ {ID: "jvm_threads_new", Name: "new"},
+ {ID: "jvm_threads_runnable", Name: "runnable"},
+ {ID: "jvm_threads_blocked", Name: "blocked"},
+ {ID: "jvm_threads_waiting", Name: "waiting"},
+ {ID: "jvm_threads_timed_waiting", Name: "timed_waiting"},
+ {ID: "jvm_threads_terminated", Name: "terminated"},
+ },
+ },
+ {
+ ID: "jvm_logs_total",
+ Title: "Number of Logs",
+ Units: "logs/s",
+ Fam: "jvm",
+ Ctx: "hdfs.logs_total",
+ Type: module.Stacked,
+ Dims: Dims{
+ {ID: "jvm_log_info", Name: "info", Algo: module.Incremental},
+ {ID: "jvm_log_error", Name: "error", Algo: module.Incremental},
+ {ID: "jvm_log_warn", Name: "warn", Algo: module.Incremental},
+ {ID: "jvm_log_fatal", Name: "fatal", Algo: module.Incremental},
+ },
+ },
+}
+
+var rpcActivityCharts = Charts{
+ {
+ ID: "rpc_bandwidth",
+ Title: "RPC Bandwidth",
+ Units: "kilobits/s",
+ Fam: "rpc",
+ Ctx: "hdfs.rpc_bandwidth",
+ Type: module.Area,
+ Dims: Dims{
+ {ID: "rpc_received_bytes", Name: "received", Div: 1000, Algo: module.Incremental},
+ {ID: "rpc_sent_bytes", Name: "sent", Div: -1000, Algo: module.Incremental},
+ },
+ },
+ {
+ ID: "rpc_calls",
+ Title: "RPC Calls",
+ Units: "calls/s",
+ Fam: "rpc",
+ Ctx: "hdfs.rpc_calls",
+ Dims: Dims{
+ {ID: "rpc_queue_time_num_ops", Name: "calls", Algo: module.Incremental},
+ },
+ },
+ {
+ ID: "rpc_open_connections",
+ Title: "RPC Open Connections",
+ Units: "connections",
+ Fam: "rpc",
+ Ctx: "hdfs.open_connections",
+ Dims: Dims{
+ {ID: "rpc_num_open_connections", Name: "open"},
+ },
+ },
+ {
+ ID: "rpc_call_queue_length",
+ Title: "RPC Call Queue Length",
+ Units: "num",
+ Fam: "rpc",
+ Ctx: "hdfs.call_queue_length",
+ Dims: Dims{
+ {ID: "rpc_call_queue_length", Name: "length"},
+ },
+ },
+ {
+ ID: "rpc_avg_queue_time",
+ Title: "RPC Avg Queue Time",
+ Units: "ms",
+ Fam: "rpc",
+ Ctx: "hdfs.avg_queue_time",
+ Dims: Dims{
+ {ID: "rpc_queue_time_avg_time", Name: "time", Div: 1000},
+ },
+ },
+ {
+ ID: "rpc_avg_processing_time",
+ Title: "RPC Avg Processing Time",
+ Units: "ms",
+ Fam: "rpc",
+ Ctx: "hdfs.avg_processing_time",
+ Dims: Dims{
+ {ID: "rpc_processing_time_avg_time", Name: "time", Div: 1000},
+ },
+ },
+}
+
+var fsNameSystemCharts = Charts{
+ {
+ ID: "fs_name_system_capacity",
+ Title: "Capacity Across All Datanodes",
+ Units: "KiB",
+ Fam: "fs name system",
+ Ctx: "hdfs.capacity",
+ Type: module.Stacked,
+ Dims: Dims{
+ {ID: "fsns_capacity_remaining", Name: "remaining", Div: 1024},
+ {ID: "fsns_capacity_used", Name: "used", Div: 1024},
+ },
+ Vars: Vars{
+ {ID: "fsns_capacity_total"},
+ },
+ },
+ {
+ ID: "fs_name_system_used_capacity",
+ Title: "Used Capacity Across All Datanodes",
+ Units: "KiB",
+ Fam: "fs name system",
+ Ctx: "hdfs.used_capacity",
+ Type: module.Stacked,
+ Dims: Dims{
+ {ID: "fsns_capacity_used_dfs", Name: "dfs", Div: 1024},
+ {ID: "fsns_capacity_used_non_dfs", Name: "non_dfs", Div: 1024},
+ },
+ },
+ {
+ ID: "fs_name_system_load",
+ Title: "Number of Concurrent File Accesses (read/write) Across All DataNodes",
+ Units: "load",
+ Fam: "fs name system",
+ Ctx: "hdfs.load",
+ Dims: Dims{
+ {ID: "fsns_total_load", Name: "load"},
+ },
+ },
+ {
+ ID: "fs_name_system_volume_failures_total",
+ Title: "Number of Volume Failures Across All Datanodes",
+ Units: "events/s",
+ Fam: "fs name system",
+ Ctx: "hdfs.volume_failures_total",
+ Dims: Dims{
+ {ID: "fsns_volume_failures_total", Name: "failures", Algo: module.Incremental},
+ },
+ },
+ {
+ ID: "fs_files_total",
+ Title: "Number of Tracked Files",
+ Units: "num",
+ Fam: "fs name system",
+ Ctx: "hdfs.files_total",
+ Dims: Dims{
+ {ID: "fsns_files_total", Name: "files"},
+ },
+ },
+ {
+ ID: "fs_blocks_total",
+ Title: "Number of Allocated Blocks in the System",
+ Units: "num",
+ Fam: "fs name system",
+ Ctx: "hdfs.blocks_total",
+ Dims: Dims{
+ {ID: "fsns_blocks_total", Name: "blocks"},
+ },
+ },
+ {
+ ID: "fs_problem_blocks",
+ Title: "Number of Problem Blocks (can point to an unhealthy cluster)",
+ Units: "num",
+ Fam: "fs name system",
+ Ctx: "hdfs.blocks",
+ Dims: Dims{
+ {ID: "fsns_corrupt_blocks", Name: "corrupt"},
+ {ID: "fsns_missing_blocks", Name: "missing"},
+ {ID: "fsns_under_replicated_blocks", Name: "under_replicated"},
+ },
+ },
+ {
+ ID: "fs_name_system_data_nodes",
+ Title: "Number of Data Nodes By Status",
+ Units: "num",
+ Fam: "fs name system",
+ Ctx: "hdfs.data_nodes",
+ Type: module.Stacked,
+ Dims: Dims{
+ {ID: "fsns_num_live_data_nodes", Name: "live"},
+ {ID: "fsns_num_dead_data_nodes", Name: "dead"},
+ {ID: "fsns_stale_data_nodes", Name: "stale"},
+ },
+ },
+}
+
+var fsDatasetStateCharts = Charts{
+ {
+ ID: "fs_dataset_state_capacity",
+ Title: "Capacity",
+ Units: "KiB",
+ Fam: "fs dataset",
+ Ctx: "hdfs.datanode_capacity",
+ Type: module.Stacked,
+ Dims: Dims{
+ {ID: "fsds_capacity_remaining", Name: "remaining", Div: 1024},
+ {ID: "fsds_capacity_used", Name: "used", Div: 1024},
+ },
+ Vars: Vars{
+ {ID: "fsds_capacity_total"},
+ },
+ },
+ {
+ ID: "fs_dataset_state_used_capacity",
+ Title: "Used Capacity",
+ Units: "KiB",
+ Fam: "fs dataset",
+ Ctx: "hdfs.datanode_used_capacity",
+ Type: module.Stacked,
+ Dims: Dims{
+ {ID: "fsds_capacity_used_dfs", Name: "dfs", Div: 1024},
+ {ID: "fsds_capacity_used_non_dfs", Name: "non_dfs", Div: 1024},
+ },
+ },
+ {
+ ID: "fs_dataset_state_num_failed_volumes",
+ Title: "Number of Failed Volumes",
+ Units: "num",
+ Fam: "fs dataset",
+ Ctx: "hdfs.datanode_failed_volumes",
+ Dims: Dims{
+ {ID: "fsds_num_failed_volumes", Name: "failed volumes"},
+ },
+ },
+}
+
+var fsDataNodeActivityCharts = Charts{
+ {
+ ID: "dna_bandwidth",
+ Title: "Bandwidth",
+ Units: "KiB/s",
+ Fam: "activity",
+ Ctx: "hdfs.datanode_bandwidth",
+ Type: module.Area,
+ Dims: Dims{
+ {ID: "dna_bytes_read", Name: "reads", Div: 1024, Algo: module.Incremental},
+ {ID: "dna_bytes_written", Name: "writes", Div: -1024, Algo: module.Incremental},
+ },
+ },
+}
+
+func dataNodeCharts() *Charts {
+ charts := Charts{}
+ panicIfError(charts.Add(*jvmCharts.Copy()...))
+ panicIfError(charts.Add(*rpcActivityCharts.Copy()...))
+ panicIfError(charts.Add(*fsDatasetStateCharts.Copy()...))
+ panicIfError(charts.Add(*fsDataNodeActivityCharts.Copy()...))
+ return &charts
+}
+
+func nameNodeCharts() *Charts {
+ charts := Charts{}
+ panicIfError(charts.Add(*jvmCharts.Copy()...))
+ panicIfError(charts.Add(*rpcActivityCharts.Copy()...))
+ panicIfError(charts.Add(*fsNameSystemCharts.Copy()...))
+ return &charts
+}
+
+func panicIfError(err error) {
+ if err != nil {
+ panic(err)
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/client.go b/src/go/collectors/go.d.plugin/modules/hdfs/client.go
new file mode 100644
index 000000000..bdeced146
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/client.go
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package hdfs
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/web"
+)
+
+func newClient(httpClient *http.Client, request web.Request) *client {
+ return &client{
+ httpClient: httpClient,
+ request: request,
+ }
+}
+
+type client struct {
+ httpClient *http.Client
+ request web.Request
+}
+
+func (c *client) do() (*http.Response, error) {
+ req, err := web.NewHTTPRequest(c.request)
+ if err != nil {
+ return nil, fmt.Errorf("error on creating http request to %s : %v", c.request.URL, err)
+ }
+
+ // req.Header.Add("Accept-Encoding", "gzip")
+ // req.Header.Set("User-Agent", "netdata/go.d.plugin")
+
+ return c.httpClient.Do(req)
+}
+
+func (c *client) doOK() (*http.Response, error) {
+ resp, err := c.do()
+ if err != nil {
+ return nil, err
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ return resp, fmt.Errorf("%s returned %d", c.request.URL, resp.StatusCode)
+ }
+ return resp, nil
+}
+
+func (c *client) doOKWithDecodeJSON(dst interface{}) error {
+ resp, err := c.doOK()
+ defer closeBody(resp)
+ if err != nil {
+ return err
+ }
+
+ err = json.NewDecoder(resp.Body).Decode(dst)
+ if err != nil {
+ return fmt.Errorf("error on decoding response from %s : %v", c.request.URL, err)
+ }
+ return nil
+}
+
+func closeBody(resp *http.Response) {
+ if resp != nil && resp.Body != nil {
+ _, _ = io.Copy(io.Discard, resp.Body)
+ _ = resp.Body.Close()
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/collect.go b/src/go/collectors/go.d.plugin/modules/hdfs/collect.go
new file mode 100644
index 000000000..d7081d36a
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/collect.go
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package hdfs
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "strings"
+
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/stm"
+)
+
+func (h *HDFS) collect() (map[string]int64, error) {
+ var raw rawJMX
+ err := h.client.doOKWithDecodeJSON(&raw)
+ if err != nil {
+ return nil, err
+ }
+
+ if raw.isEmpty() {
+ return nil, errors.New("empty response")
+ }
+
+ mx := h.collectRawJMX(raw)
+
+ return stm.ToMap(mx), nil
+}
+
+func (h *HDFS) determineNodeType() (nodeType, error) {
+ var raw rawJMX
+ err := h.client.doOKWithDecodeJSON(&raw)
+ if err != nil {
+ return "", err
+ }
+
+ if raw.isEmpty() {
+ return "", errors.New("empty response")
+ }
+
+ jvm := raw.findJvm()
+ if jvm == nil {
+ return "", errors.New("couldn't find jvm in response")
+ }
+
+ v, ok := jvm["tag.ProcessName"]
+ if !ok {
+ return "", errors.New("couldn't find process name in JvmMetrics")
+ }
+
+ t := nodeType(strings.Trim(string(v), "\""))
+ if t == nameNodeType || t == dataNodeType {
+ return t, nil
+ }
+ return "", errors.New("unknown node type")
+}
+
+func (h *HDFS) collectRawJMX(raw rawJMX) *metrics {
+ var mx metrics
+ switch h.nodeType {
+ default:
+ panic(fmt.Sprintf("unsupported node type : '%s'", h.nodeType))
+ case nameNodeType:
+ h.collectNameNode(&mx, raw)
+ case dataNodeType:
+ h.collectDataNode(&mx, raw)
+ }
+ return &mx
+}
+
+func (h *HDFS) collectNameNode(mx *metrics, raw rawJMX) {
+ err := h.collectJVM(mx, raw)
+ if err != nil {
+ h.Debugf("error on collecting jvm : %v", err)
+ }
+
+ err = h.collectRPCActivity(mx, raw)
+ if err != nil {
+ h.Debugf("error on collecting rpc activity : %v", err)
+ }
+
+ err = h.collectFSNameSystem(mx, raw)
+ if err != nil {
+ h.Debugf("error on collecting fs name system : %v", err)
+ }
+}
+
+func (h *HDFS) collectDataNode(mx *metrics, raw rawJMX) {
+ err := h.collectJVM(mx, raw)
+ if err != nil {
+ h.Debugf("error on collecting jvm : %v", err)
+ }
+
+ err = h.collectRPCActivity(mx, raw)
+ if err != nil {
+ h.Debugf("error on collecting rpc activity : %v", err)
+ }
+
+ err = h.collectFSDatasetState(mx, raw)
+ if err != nil {
+ h.Debugf("error on collecting fs dataset state : %v", err)
+ }
+
+ err = h.collectDataNodeActivity(mx, raw)
+ if err != nil {
+ h.Debugf("error on collecting datanode activity state : %v", err)
+ }
+}
+
+func (h *HDFS) collectJVM(mx *metrics, raw rawJMX) error {
+ v := raw.findJvm()
+ if v == nil {
+ return nil
+ }
+
+ var jvm jvmMetrics
+ err := writeJSONTo(&jvm, v)
+ if err != nil {
+ return err
+ }
+
+ mx.Jvm = &jvm
+ return nil
+}
+
+func (h *HDFS) collectRPCActivity(mx *metrics, raw rawJMX) error {
+ v := raw.findRPCActivity()
+ if v == nil {
+ return nil
+ }
+
+ var rpc rpcActivityMetrics
+ err := writeJSONTo(&rpc, v)
+ if err != nil {
+ return err
+ }
+
+ mx.Rpc = &rpc
+ return nil
+}
+
+func (h *HDFS) collectFSNameSystem(mx *metrics, raw rawJMX) error {
+ v := raw.findFSNameSystem()
+ if v == nil {
+ return nil
+ }
+
+ var fs fsNameSystemMetrics
+ err := writeJSONTo(&fs, v)
+ if err != nil {
+ return err
+ }
+
+ fs.CapacityUsed = fs.CapacityDfsUsed + fs.CapacityUsedNonDFS
+
+ mx.FSNameSystem = &fs
+ return nil
+}
+
+func (h *HDFS) collectFSDatasetState(mx *metrics, raw rawJMX) error {
+ v := raw.findFSDatasetState()
+ if v == nil {
+ return nil
+ }
+
+ var fs fsDatasetStateMetrics
+ err := writeJSONTo(&fs, v)
+ if err != nil {
+ return err
+ }
+
+ fs.CapacityUsed = fs.Capacity - fs.Remaining
+ fs.CapacityUsedNonDFS = fs.CapacityUsed - fs.DfsUsed
+
+ mx.FSDatasetState = &fs
+ return nil
+}
+
+func (h *HDFS) collectDataNodeActivity(mx *metrics, raw rawJMX) error {
+ v := raw.findDataNodeActivity()
+ if v == nil {
+ return nil
+ }
+
+ var dna dataNodeActivityMetrics
+ err := writeJSONTo(&dna, v)
+ if err != nil {
+ return err
+ }
+
+ mx.DataNodeActivity = &dna
+ return nil
+}
+
+func writeJSONTo(dst interface{}, src interface{}) error {
+ b, err := json.Marshal(src)
+ if err != nil {
+ return err
+ }
+ return json.Unmarshal(b, dst)
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/config_schema.json b/src/go/collectors/go.d.plugin/modules/hdfs/config_schema.json
new file mode 100644
index 000000000..416b69418
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/config_schema.json
@@ -0,0 +1,180 @@
+{
+ "jsonSchema": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "HDFS collector configuration.",
+ "type": "object",
+ "properties": {
+ "update_every": {
+ "title": "Update every",
+ "description": "Data collection interval, measured in seconds.",
+ "type": "integer",
+ "minimum": 1,
+ "default": 1
+ },
+ "url": {
+ "title": "URL",
+ "description": "The URL of the HDFS DataNode or NameNode JMX endpoint.",
+ "type": "string",
+ "default": "http://127.0.0.1:9870/jmx",
+ "format": "uri"
+ },
+ "timeout": {
+ "title": "Timeout",
+ "description": "The timeout in seconds for the HTTP request.",
+ "type": "number",
+ "minimum": 0.5,
+ "default": 1
+ },
+ "not_follow_redirects": {
+ "title": "Not follow redirects",
+ "description": "If set, the client will not follow HTTP redirects automatically.",
+ "type": "boolean"
+ },
+ "username": {
+ "title": "Username",
+ "description": "The username for basic authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "password": {
+ "title": "Password",
+ "description": "The password for basic authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "proxy_url": {
+ "title": "Proxy URL",
+ "description": "The URL of the proxy server.",
+ "type": "string"
+ },
+ "proxy_username": {
+ "title": "Proxy username",
+ "description": "The username for proxy authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "proxy_password": {
+ "title": "Proxy password",
+ "description": "The password for proxy authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "headers": {
+ "title": "Headers",
+ "description": "Additional HTTP headers to include in the request.",
+ "type": [
+ "object",
+ "null"
+ ],
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "tls_skip_verify": {
+ "title": "Skip TLS verification",
+ "description": "If set, TLS certificate verification will be skipped.",
+ "type": "boolean"
+ },
+ "tls_ca": {
+ "title": "TLS CA",
+ "description": "The path to the CA certificate file for TLS verification.",
+ "type": "string",
+ "pattern": "^$|^/"
+ },
+ "tls_cert": {
+ "title": "TLS certificate",
+ "description": "The path to the client certificate file for TLS authentication.",
+ "type": "string",
+ "pattern": "^$|^/"
+ },
+ "tls_key": {
+ "title": "TLS key",
+ "description": "The path to the client key file for TLS authentication.",
+ "type": "string",
+ "pattern": "^$|^/"
+ },
+ "body": {
+ "title": "Body",
+ "type": "string"
+ },
+ "method": {
+ "title": "Method",
+ "type": "string"
+ }
+ },
+ "required": [
+ "url"
+ ],
+ "additionalProperties": false,
+ "patternProperties": {
+ "^name$": {}
+ }
+ },
+ "uiSchema": {
+ "ui:flavour": "tabs",
+ "ui:options": {
+ "tabs": [
+ {
+ "title": "Base",
+ "fields": [
+ "update_every",
+ "url",
+ "timeout",
+ "not_follow_redirects"
+ ]
+ },
+ {
+ "title": "Auth",
+ "fields": [
+ "username",
+ "password"
+ ]
+ },
+ {
+ "title": "TLS",
+ "fields": [
+ "tls_skip_verify",
+ "tls_ca",
+ "tls_cert",
+ "tls_key"
+ ]
+ },
+ {
+ "title": "Proxy",
+ "fields": [
+ "proxy_url",
+ "proxy_username",
+ "proxy_password"
+ ]
+ },
+ {
+ "title": "Headers",
+ "fields": [
+ "headers"
+ ]
+ }
+ ]
+ },
+ "uiOptions": {
+ "fullPage": true
+ },
+ "body": {
+ "ui:widget": "hidden"
+ },
+ "method": {
+ "ui:widget": "hidden"
+ },
+ "url": {
+ "ui:help": "By default, the DataNode's port is 9864, and the NameNode's port is 9870, as specified in the [HDFS configuration](https://hadoop.apache.org/docs/r3.1.3/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml)."
+ },
+ "timeout": {
+ "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)."
+ },
+ "password": {
+ "ui:widget": "password"
+ },
+ "proxy_password": {
+ "ui:widget": "password"
+ }
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/hdfs.go b/src/go/collectors/go.d.plugin/modules/hdfs/hdfs.go
new file mode 100644
index 000000000..1b0f849a6
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/hdfs.go
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package hdfs
+
+import (
+ _ "embed"
+ "errors"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/web"
+)
+
+//go:embed "config_schema.json"
+var configSchema string
+
+func init() {
+ module.Register("hdfs", module.Creator{
+ JobConfigSchema: configSchema,
+ Create: func() module.Module { return New() },
+ Config: func() any { return &Config{} },
+ })
+}
+
+func New() *HDFS {
+ config := Config{
+ HTTP: web.HTTP{
+ Request: web.Request{
+ URL: "http://127.0.0.1:9870/jmx",
+ },
+ Client: web.Client{
+ Timeout: web.Duration(time.Second),
+ },
+ },
+ }
+
+ return &HDFS{
+ Config: config,
+ }
+}
+
+type Config struct {
+ web.HTTP `yaml:",inline" json:""`
+ UpdateEvery int `yaml:"update_every" json:"update_every"`
+}
+
+type (
+ HDFS struct {
+ module.Base
+ Config `yaml:",inline" json:""`
+
+ client *client
+
+ nodeType
+ }
+ nodeType string
+)
+
+const (
+ dataNodeType nodeType = "DataNode"
+ nameNodeType nodeType = "NameNode"
+)
+
+func (h *HDFS) Configuration() any {
+ return h.Config
+}
+
+func (h *HDFS) Init() error {
+ if err := h.validateConfig(); err != nil {
+ h.Errorf("config validation: %v", err)
+ return err
+ }
+
+ cl, err := h.createClient()
+ if err != nil {
+ h.Errorf("error on creating client : %v", err)
+ return err
+ }
+ h.client = cl
+
+ return nil
+}
+
+func (h *HDFS) Check() error {
+ typ, err := h.determineNodeType()
+ if err != nil {
+ h.Errorf("error on node type determination : %v", err)
+ return err
+ }
+ h.nodeType = typ
+
+ mx, err := h.collect()
+ if err != nil {
+ h.Error(err)
+ return err
+ }
+ if len(mx) == 0 {
+ return errors.New("no metrics collected")
+ }
+ return nil
+}
+
+func (h *HDFS) Charts() *Charts {
+ switch h.nodeType {
+ default:
+ return nil
+ case nameNodeType:
+ return nameNodeCharts()
+ case dataNodeType:
+ return dataNodeCharts()
+ }
+}
+
+func (h *HDFS) Collect() map[string]int64 {
+ mx, err := h.collect()
+
+ if err != nil {
+ h.Error(err)
+ }
+
+ if len(mx) == 0 {
+ return nil
+ }
+
+ return mx
+}
+
+func (h *HDFS) Cleanup() {
+ if h.client != nil && h.client.httpClient != nil {
+ h.client.httpClient.CloseIdleConnections()
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/hdfs_test.go b/src/go/collectors/go.d.plugin/modules/hdfs/hdfs_test.go
new file mode 100644
index 000000000..f9cbdc1bb
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/hdfs_test.go
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package hdfs
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "testing"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+var (
+ dataConfigJSON, _ = os.ReadFile("testdata/config.json")
+ dataConfigYAML, _ = os.ReadFile("testdata/config.yaml")
+
+ dataUnknownNodeMetrics, _ = os.ReadFile("testdata/unknownnode.json")
+ dataDataNodeMetrics, _ = os.ReadFile("testdata/datanode.json")
+ dataNameNodeMetrics, _ = os.ReadFile("testdata/namenode.json")
+)
+
+func Test_testDataIsValid(t *testing.T) {
+ for name, data := range map[string][]byte{
+ "dataConfigJSON": dataConfigJSON,
+ "dataConfigYAML": dataConfigYAML,
+ "dataUnknownNodeMetrics": dataUnknownNodeMetrics,
+ "dataDataNodeMetrics": dataDataNodeMetrics,
+ "dataNameNodeMetrics": dataNameNodeMetrics,
+ } {
+ require.NotNil(t, data, name)
+ }
+}
+
+func TestHDFS_ConfigurationSerialize(t *testing.T) {
+ module.TestConfigurationSerialize(t, &HDFS{}, dataConfigJSON, dataConfigYAML)
+}
+
+func TestHDFS_Init(t *testing.T) {
+ job := New()
+
+ assert.NoError(t, job.Init())
+}
+
+func TestHDFS_InitErrorOnCreatingClientWrongTLSCA(t *testing.T) {
+ job := New()
+ job.Client.TLSConfig.TLSCA = "testdata/tls"
+
+ assert.Error(t, job.Init())
+}
+
+func TestHDFS_Check(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write(dataNameNodeMetrics)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+
+ assert.NoError(t, job.Check())
+ assert.NotZero(t, job.nodeType)
+}
+
+func TestHDFS_CheckDataNode(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write(dataDataNodeMetrics)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+
+ assert.NoError(t, job.Check())
+ assert.Equal(t, dataNodeType, job.nodeType)
+}
+
+func TestHDFS_CheckNameNode(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write(dataNameNodeMetrics)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+
+ assert.NoError(t, job.Check())
+ assert.Equal(t, nameNodeType, job.nodeType)
+}
+
+func TestHDFS_CheckErrorOnNodeTypeDetermination(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write(dataUnknownNodeMetrics)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+
+ assert.Error(t, job.Check())
+}
+
+func TestHDFS_CheckNoResponse(t *testing.T) {
+ job := New()
+ job.URL = "http://127.0.0.1:38001/jmx"
+ require.NoError(t, job.Init())
+
+ assert.Error(t, job.Check())
+}
+
+func TestHDFS_Charts(t *testing.T) {
+ assert.Nil(t, New().Charts())
+}
+
+func TestHDFS_ChartsUnknownNode(t *testing.T) {
+ job := New()
+
+ assert.Nil(t, job.Charts())
+}
+
+func TestHDFS_ChartsDataNode(t *testing.T) {
+ job := New()
+ job.nodeType = dataNodeType
+
+ assert.Equal(t, dataNodeCharts(), job.Charts())
+}
+
+func TestHDFS_ChartsNameNode(t *testing.T) {
+ job := New()
+ job.nodeType = nameNodeType
+
+ assert.Equal(t, nameNodeCharts(), job.Charts())
+}
+
+func TestHDFS_Cleanup(t *testing.T) {
+ New().Cleanup()
+}
+
+func TestHDFS_CollectDataNode(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write(dataDataNodeMetrics)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+ require.NoError(t, job.Check())
+
+ expected := map[string]int64{
+ "dna_bytes_read": 80689178,
+ "dna_bytes_written": 500960407,
+ "fsds_capacity_remaining": 32920760320,
+ "fsds_capacity_total": 53675536384,
+ "fsds_capacity_used": 20754776064,
+ "fsds_capacity_used_dfs": 1186058240,
+ "fsds_capacity_used_non_dfs": 19568717824,
+ "fsds_num_failed_volumes": 0,
+ "jvm_gc_count": 155,
+ "jvm_gc_num_info_threshold_exceeded": 0,
+ "jvm_gc_num_warn_threshold_exceeded": 0,
+ "jvm_gc_time_millis": 672,
+ "jvm_gc_total_extra_sleep_time": 8783,
+ "jvm_log_error": 1,
+ "jvm_log_fatal": 0,
+ "jvm_log_info": 257,
+ "jvm_log_warn": 2,
+ "jvm_mem_heap_committed": 60500,
+ "jvm_mem_heap_max": 843,
+ "jvm_mem_heap_used": 18885,
+ "jvm_threads_blocked": 0,
+ "jvm_threads_new": 0,
+ "jvm_threads_runnable": 11,
+ "jvm_threads_terminated": 0,
+ "jvm_threads_timed_waiting": 25,
+ "jvm_threads_waiting": 11,
+ "rpc_call_queue_length": 0,
+ "rpc_num_open_connections": 0,
+ "rpc_processing_time_avg_time": 0,
+ "rpc_queue_time_avg_time": 0,
+ "rpc_queue_time_num_ops": 0,
+ "rpc_received_bytes": 7,
+ "rpc_sent_bytes": 187,
+ }
+
+ assert.Equal(t, expected, job.Collect())
+}
+
+func TestHDFS_CollectNameNode(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write(dataNameNodeMetrics)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+ require.NoError(t, job.Check())
+
+ expected := map[string]int64{
+ "fsns_blocks_total": 15,
+ "fsns_capacity_remaining": 65861697536,
+ "fsns_capacity_total": 107351072768,
+ "fsns_capacity_used": 41489375232,
+ "fsns_capacity_used_dfs": 2372116480,
+ "fsns_capacity_used_non_dfs": 39117258752,
+ "fsns_corrupt_blocks": 0,
+ "fsns_files_total": 12,
+ "fsns_missing_blocks": 0,
+ "fsns_num_dead_data_nodes": 0,
+ "fsns_num_live_data_nodes": 2,
+ "fsns_stale_data_nodes": 0,
+ "fsns_total_load": 2,
+ "fsns_under_replicated_blocks": 0,
+ "fsns_volume_failures_total": 0,
+ "jvm_gc_count": 1699,
+ "jvm_gc_num_info_threshold_exceeded": 0,
+ "jvm_gc_num_warn_threshold_exceeded": 0,
+ "jvm_gc_time_millis": 3483,
+ "jvm_gc_total_extra_sleep_time": 1944,
+ "jvm_log_error": 0,
+ "jvm_log_fatal": 0,
+ "jvm_log_info": 3382077,
+ "jvm_log_warn": 3378983,
+ "jvm_mem_heap_committed": 67000,
+ "jvm_mem_heap_max": 843,
+ "jvm_mem_heap_used": 26603,
+ "jvm_threads_blocked": 0,
+ "jvm_threads_new": 0,
+ "jvm_threads_runnable": 7,
+ "jvm_threads_terminated": 0,
+ "jvm_threads_timed_waiting": 34,
+ "jvm_threads_waiting": 6,
+ "rpc_call_queue_length": 0,
+ "rpc_num_open_connections": 2,
+ "rpc_processing_time_avg_time": 0,
+ "rpc_queue_time_avg_time": 58,
+ "rpc_queue_time_num_ops": 585402,
+ "rpc_received_bytes": 240431351,
+ "rpc_sent_bytes": 25067414,
+ }
+
+ assert.Equal(t, expected, job.Collect())
+}
+
+func TestHDFS_CollectUnknownNode(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write(dataUnknownNodeMetrics)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+
+ assert.Panics(t, func() { _ = job.Collect() })
+}
+
+func TestHDFS_CollectNoResponse(t *testing.T) {
+ job := New()
+ job.URL = "http://127.0.0.1:38001/jmx"
+ require.NoError(t, job.Init())
+
+ assert.Nil(t, job.Collect())
+}
+
+func TestHDFS_CollectReceiveInvalidResponse(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write([]byte("hello and\ngoodbye!\n"))
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+
+ assert.Nil(t, job.Collect())
+}
+
+func TestHDFS_CollectReceive404(t *testing.T) {
+ ts := httptest.NewServer(
+ http.HandlerFunc(
+ func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusNotFound)
+ }))
+ defer ts.Close()
+
+ job := New()
+ job.URL = ts.URL
+ require.NoError(t, job.Init())
+
+ assert.Nil(t, job.Collect())
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/init.go b/src/go/collectors/go.d.plugin/modules/hdfs/init.go
new file mode 100644
index 000000000..79cd2e6bf
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/init.go
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package hdfs
+
+import (
+ "errors"
+
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/web"
+)
+
+func (h *HDFS) validateConfig() error {
+ if h.URL == "" {
+ return errors.New("url not set")
+ }
+ return nil
+}
+
+func (h *HDFS) createClient() (*client, error) {
+ httpClient, err := web.NewHTTPClient(h.Client)
+ if err != nil {
+ return nil, err
+ }
+
+ return newClient(httpClient, h.Request), nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/integrations/hadoop_distributed_file_system_hdfs.md b/src/go/collectors/go.d.plugin/modules/hdfs/integrations/hadoop_distributed_file_system_hdfs.md
new file mode 100644
index 000000000..a3b39b183
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/integrations/hadoop_distributed_file_system_hdfs.md
@@ -0,0 +1,251 @@
+<!--startmeta
+custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/hdfs/README.md"
+meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/hdfs/metadata.yaml"
+sidebar_label: "Hadoop Distributed File System (HDFS)"
+learn_status: "Published"
+learn_rel_path: "Collecting Metrics/Storage, Mount Points and Filesystems"
+most_popular: True
+message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE"
+endmeta-->
+
+# Hadoop Distributed File System (HDFS)
+
+
+<img src="https://netdata.cloud/img/hadoop.svg" width="150"/>
+
+
+Plugin: go.d.plugin
+Module: hfs
+
+<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" />
+
+## Overview
+
+This collector monitors HDFS nodes.
+
+Netdata accesses HDFS metrics over `Java Management Extensions` (JMX) through the web interface of an HDFS daemon.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Hadoop Distributed File System (HDFS) instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit | DataNode | NameNode |
+|:------|:----------|:----|:---:|:---:|
+| hdfs.heap_memory | committed, used | MiB | • | • |
+| hdfs.gc_count_total | gc | events/s | • | • |
+| hdfs.gc_time_total | ms | ms | • | • |
+| hdfs.gc_threshold | info, warn | events/s | • | • |
+| hdfs.threads | new, runnable, blocked, waiting, timed_waiting, terminated | num | • | • |
+| hdfs.logs_total | info, error, warn, fatal | logs/s | • | • |
+| hdfs.rpc_bandwidth | received, sent | kilobits/s | • | • |
+| hdfs.rpc_calls | calls | calls/s | • | • |
+| hdfs.open_connections | open | connections | • | • |
+| hdfs.call_queue_length | length | num | • | • |
+| hdfs.avg_queue_time | time | ms | • | • |
+| hdfs.avg_processing_time | time | ms | • | • |
+| hdfs.capacity | remaining, used | KiB | | • |
+| hdfs.used_capacity | dfs, non_dfs | KiB | | • |
+| hdfs.load | load | load | | • |
+| hdfs.volume_failures_total | failures | events/s | | • |
+| hdfs.files_total | files | num | | • |
+| hdfs.blocks_total | blocks | num | | • |
+| hdfs.blocks | corrupt, missing, under_replicated | num | | • |
+| hdfs.data_nodes | live, dead, stale | num | | • |
+| hdfs.datanode_capacity | remaining, used | KiB | • | |
+| hdfs.datanode_used_capacity | dfs, non_dfs | KiB | • | |
+| hdfs.datanode_failed_volumes | failed volumes | num | • | |
+| hdfs.datanode_bandwidth | reads, writes | KiB/s | • | |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ hdfs_capacity_usage ](https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf) | hdfs.capacity | summary datanodes space capacity utilization |
+| [ hdfs_missing_blocks ](https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf) | hdfs.blocks | number of missing blocks |
+| [ hdfs_stale_nodes ](https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf) | hdfs.data_nodes | number of datanodes marked stale due to delayed heartbeat |
+| [ hdfs_dead_nodes ](https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf) | hdfs.data_nodes | number of datanodes which are currently dead |
+| [ hdfs_num_failed_volumes ](https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf) | hdfs.num_failed_volumes | number of failed volumes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `go.d/hdfs.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config go.d/hdfs.conf
+```
+#### Options
+
+The following options can be defined globally: update_every, autodetection_retry.
+
+
+<details open><summary>Config options</summary>
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Data collection frequency. | 1 | no |
+| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no |
+| url | Server URL. | http://127.0.0.1:9870/jmx | yes |
+| timeout | HTTP request timeout. | 1 | no |
+| username | Username for basic HTTP authentication. | | no |
+| password | Password for basic HTTP authentication. | | no |
+| proxy_url | Proxy URL. | | no |
+| proxy_username | Username for proxy basic HTTP authentication. | | no |
+| proxy_password | Password for proxy basic HTTP authentication. | | no |
+| method | HTTP request method. | GET | no |
+| body | HTTP request body. | | no |
+| headers | HTTP request headers. | | no |
+| not_follow_redirects | Redirect handling policy. Controls whether the client follows redirects. | no | no |
+| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no |
+| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no |
+| tls_cert | Client TLS certificate. | | no |
+| tls_key | Client TLS key. | | no |
+
+</details>
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+```yaml
+jobs:
+ - name: local
+ url: http://127.0.0.1:9870/jmx
+
+```
+##### HTTP authentication
+
+Basic HTTP authentication.
+
+<details open><summary>Config</summary>
+
+```yaml
+jobs:
+ - name: local
+ url: http://127.0.0.1:9870/jmx
+ username: username
+ password: password
+
+```
+</details>
+
+##### HTTPS with self-signed certificate
+
+Do not validate server certificate chain and hostname.
+
+
+<details open><summary>Config</summary>
+
+```yaml
+jobs:
+ - name: local
+ url: https://127.0.0.1:9870/jmx
+ tls_skip_verify: yes
+
+```
+</details>
+
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+<details open><summary>Config</summary>
+
+```yaml
+jobs:
+ - name: local
+ url: http://127.0.0.1:9870/jmx
+
+ - name: remote
+ url: http://192.0.2.1:9870/jmx
+
+```
+</details>
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `hfs` collector, run the `go.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `go.d.plugin` to debug the collector:
+
+ ```bash
+ ./go.d.plugin -d -m hfs
+ ```
+
+
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/metadata.yaml b/src/go/collectors/go.d.plugin/modules/hdfs/metadata.yaml
new file mode 100644
index 000000000..694868e01
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/metadata.yaml
@@ -0,0 +1,388 @@
+plugin_name: go.d.plugin
+modules:
+ - meta:
+ id: collector-go.d.plugin-hfs
+ plugin_name: go.d.plugin
+ module_name: hfs
+ monitored_instance:
+ name: Hadoop Distributed File System (HDFS)
+ link: https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html
+ icon_filename: hadoop.svg
+ categories:
+ - data-collection.storage-mount-points-and-filesystems
+ keywords:
+ - hdfs
+ - hadoop
+ related_resources:
+ integrations:
+ list: []
+ info_provided_to_referring_integrations:
+ description: ""
+ most_popular: true
+ overview:
+ data_collection:
+ metrics_description: |
+ This collector monitors HDFS nodes.
+
+ Netdata accesses HDFS metrics over `Java Management Extensions` (JMX) through the web interface of an HDFS daemon.
+ method_description: ""
+ supported_platforms:
+ include: []
+ exclude: []
+ multi_instance: true
+ additional_permissions:
+ description: ""
+ default_behavior:
+ auto_detection:
+ description: ""
+ limits:
+ description: ""
+ performance_impact:
+ description: ""
+ setup:
+ prerequisites:
+ list: []
+ configuration:
+ file:
+ name: go.d/hdfs.conf
+ options:
+ description: |
+ The following options can be defined globally: update_every, autodetection_retry.
+ folding:
+ title: Config options
+ enabled: true
+ list:
+ - name: update_every
+ description: Data collection frequency.
+ default_value: 1
+ required: false
+ - name: autodetection_retry
+ description: Recheck interval in seconds. Zero means no recheck will be scheduled.
+ default_value: 0
+ required: false
+ - name: url
+ description: Server URL.
+ default_value: http://127.0.0.1:9870/jmx
+ required: true
+ - name: timeout
+ description: HTTP request timeout.
+ default_value: 1
+ required: false
+ - name: username
+ description: Username for basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: password
+ description: Password for basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: proxy_url
+ description: Proxy URL.
+ default_value: ""
+ required: false
+ - name: proxy_username
+ description: Username for proxy basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: proxy_password
+ description: Password for proxy basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: method
+ description: HTTP request method.
+ default_value: "GET"
+ required: false
+ - name: body
+ description: HTTP request body.
+ default_value: ""
+ required: false
+ - name: headers
+ description: HTTP request headers.
+ default_value: ""
+ required: false
+ - name: not_follow_redirects
+ description: Redirect handling policy. Controls whether the client follows redirects.
+ default_value: no
+ required: false
+ - name: tls_skip_verify
+ description: Server certificate chain and hostname validation policy. Controls whether the client performs this check.
+ default_value: no
+ required: false
+ - name: tls_ca
+ description: Certification authority that the client uses when verifying the server's certificates.
+ default_value: ""
+ required: false
+ - name: tls_cert
+ description: Client TLS certificate.
+ default_value: ""
+ required: false
+ - name: tls_key
+ description: Client TLS key.
+ default_value: ""
+ required: false
+ examples:
+ folding:
+ title: Config
+ enabled: true
+ list:
+ - name: Basic
+ folding:
+ enabled: false
+ description: A basic example configuration.
+ config: |
+ jobs:
+ - name: local
+ url: http://127.0.0.1:9870/jmx
+ - name: HTTP authentication
+ description: Basic HTTP authentication.
+ config: |
+ jobs:
+ - name: local
+ url: http://127.0.0.1:9870/jmx
+ username: username
+ password: password
+ - name: HTTPS with self-signed certificate
+ description: |
+ Do not validate server certificate chain and hostname.
+ config: |
+ jobs:
+ - name: local
+ url: https://127.0.0.1:9870/jmx
+ tls_skip_verify: yes
+ - name: Multi-instance
+ description: |
+ > **Note**: When you define multiple jobs, their names must be unique.
+
+ Collecting metrics from local and remote instances.
+ config: |
+ jobs:
+ - name: local
+ url: http://127.0.0.1:9870/jmx
+
+ - name: remote
+ url: http://192.0.2.1:9870/jmx
+ troubleshooting:
+ problems:
+ list: []
+ alerts:
+ - name: hdfs_capacity_usage
+ metric: hdfs.capacity
+ info: summary datanodes space capacity utilization
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
+ - name: hdfs_missing_blocks
+ metric: hdfs.blocks
+ info: number of missing blocks
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
+ - name: hdfs_stale_nodes
+ metric: hdfs.data_nodes
+ info: number of datanodes marked stale due to delayed heartbeat
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
+ - name: hdfs_dead_nodes
+ metric: hdfs.data_nodes
+ info: number of datanodes which are currently dead
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
+ - name: hdfs_num_failed_volumes
+ metric: hdfs.num_failed_volumes
+ info: number of failed volumes
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
+ metrics:
+ folding:
+ title: Metrics
+ enabled: false
+ description: ""
+ availability:
+ - DataNode
+ - NameNode
+ scopes:
+ - name: global
+ description: These metrics refer to the entire monitored application.
+ labels: []
+ metrics:
+ - name: hdfs.heap_memory
+ description: Heap Memory
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: committed
+ - name: used
+ - name: hdfs.gc_count_total
+ description: GC Events
+ unit: events/s
+ chart_type: line
+ dimensions:
+ - name: gc
+ - name: hdfs.gc_time_total
+ description: GC Time
+ unit: ms
+ chart_type: line
+ dimensions:
+ - name: ms
+ - name: hdfs.gc_threshold
+ description: Number of Times That the GC Threshold is Exceeded
+ unit: events/s
+ chart_type: line
+ dimensions:
+ - name: info
+ - name: warn
+ - name: hdfs.threads
+ description: Number of Threads
+ unit: num
+ chart_type: stacked
+ dimensions:
+ - name: new
+ - name: runnable
+ - name: blocked
+ - name: waiting
+ - name: timed_waiting
+ - name: terminated
+ - name: hdfs.logs_total
+ description: Number of Logs
+ unit: logs/s
+ chart_type: stacked
+ dimensions:
+ - name: info
+ - name: error
+ - name: warn
+ - name: fatal
+ - name: hdfs.rpc_bandwidth
+ description: RPC Bandwidth
+ unit: kilobits/s
+ chart_type: area
+ dimensions:
+ - name: received
+ - name: sent
+ - name: hdfs.rpc_calls
+ description: RPC Calls
+ unit: calls/s
+ chart_type: line
+ dimensions:
+ - name: calls
+ - name: hdfs.open_connections
+ description: RPC Open Connections
+ unit: connections
+ chart_type: line
+ dimensions:
+ - name: open
+ - name: hdfs.call_queue_length
+ description: RPC Call Queue Length
+ unit: num
+ chart_type: line
+ dimensions:
+ - name: length
+ - name: hdfs.avg_queue_time
+ description: RPC Avg Queue Time
+ unit: ms
+ chart_type: line
+ dimensions:
+ - name: time
+ - name: hdfs.avg_processing_time
+ description: RPC Avg Processing Time
+ unit: ms
+ chart_type: line
+ dimensions:
+ - name: time
+ - name: hdfs.capacity
+ description: Capacity Across All Datanodes
+ unit: KiB
+ chart_type: stacked
+ availability:
+ - NameNode
+ dimensions:
+ - name: remaining
+ - name: used
+ - name: hdfs.used_capacity
+ description: Used Capacity Across All Datanodes
+ unit: KiB
+ chart_type: stacked
+ availability:
+ - NameNode
+ dimensions:
+ - name: dfs
+ - name: non_dfs
+ - name: hdfs.load
+ description: Number of Concurrent File Accesses (read/write) Across All DataNodes
+ unit: load
+ chart_type: line
+ availability:
+ - NameNode
+ dimensions:
+ - name: load
+ - name: hdfs.volume_failures_total
+ description: Number of Volume Failures Across All Datanodes
+ unit: events/s
+ chart_type: line
+ availability:
+ - NameNode
+ dimensions:
+ - name: failures
+ - name: hdfs.files_total
+ description: Number of Tracked Files
+ unit: num
+ chart_type: line
+ availability:
+ - NameNode
+ dimensions:
+ - name: files
+ - name: hdfs.blocks_total
+ description: Number of Allocated Blocks in the System
+ unit: num
+ chart_type: line
+ availability:
+ - NameNode
+ dimensions:
+ - name: blocks
+ - name: hdfs.blocks
+ description: Number of Problem Blocks (can point to an unhealthy cluster)
+ unit: num
+ chart_type: line
+ availability:
+ - NameNode
+ dimensions:
+ - name: corrupt
+ - name: missing
+ - name: under_replicated
+ - name: hdfs.data_nodes
+ description: Number of Data Nodes By Status
+ unit: num
+ chart_type: stacked
+ availability:
+ - NameNode
+ dimensions:
+ - name: live
+ - name: dead
+ - name: stale
+ - name: hdfs.datanode_capacity
+ description: Capacity
+ unit: KiB
+ chart_type: stacked
+ availability:
+ - DataNode
+ dimensions:
+ - name: remaining
+ - name: used
+ - name: hdfs.datanode_used_capacity
+ description: Used Capacity
+ unit: KiB
+ chart_type: stacked
+ availability:
+ - DataNode
+ dimensions:
+ - name: dfs
+ - name: non_dfs
+ - name: hdfs.datanode_failed_volumes
+ description: Number of Failed Volumes
+ unit: num
+ chart_type: line
+ availability:
+ - DataNode
+ dimensions:
+ - name: failed volumes
+ - name: hdfs.datanode_bandwidth
+ description: Bandwidth
+ unit: KiB/s
+ chart_type: area
+ availability:
+ - DataNode
+ dimensions:
+ - name: reads
+ - name: writes
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/metrics.go b/src/go/collectors/go.d.plugin/modules/hdfs/metrics.go
new file mode 100644
index 000000000..972436a5d
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/metrics.go
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package hdfs
+
+// HDFS Architecture
+// https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html#NameNode+and+DataNodes
+
+// Metrics description
+// https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Metrics.html
+
+// Good article
+// https://www.datadoghq.com/blog/monitor-hadoop-metrics/#hdfs-metrics
+
+type metrics struct {
+ Jvm *jvmMetrics `stm:"jvm"` // both
+ Rpc *rpcActivityMetrics `stm:"rpc"` // both
+ FSNameSystem *fsNameSystemMetrics `stm:"fsns"` // namenode
+ FSDatasetState *fsDatasetStateMetrics `stm:"fsds"` // datanode
+ DataNodeActivity *dataNodeActivityMetrics `stm:"dna"` // datanode
+}
+
+type jvmMetrics struct {
+ ProcessName string `json:"tag.ProcessName"`
+ HostName string `json:"tag.Hostname"`
+ //MemNonHeapUsedM float64 `stm:"mem_non_heap_used,1000,1"`
+ //MemNonHeapCommittedM float64 `stm:"mem_non_heap_committed,1000,1"`
+ //MemNonHeapMaxM float64 `stm:"mem_non_heap_max"`
+ MemHeapUsedM float64 `stm:"mem_heap_used,1000,1"`
+ MemHeapCommittedM float64 `stm:"mem_heap_committed,1000,1"`
+ MemHeapMaxM float64 `stm:"mem_heap_max"`
+ //MemMaxM float64 `stm:"mem_max"`
+ GcCount float64 `stm:"gc_count"`
+ GcTimeMillis float64 `stm:"gc_time_millis"`
+ GcNumWarnThresholdExceeded float64 `stm:"gc_num_warn_threshold_exceeded"`
+ GcNumInfoThresholdExceeded float64 `stm:"gc_num_info_threshold_exceeded"`
+ GcTotalExtraSleepTime float64 `stm:"gc_total_extra_sleep_time"`
+ ThreadsNew float64 `stm:"threads_new"`
+ ThreadsRunnable float64 `stm:"threads_runnable"`
+ ThreadsBlocked float64 `stm:"threads_blocked"`
+ ThreadsWaiting float64 `stm:"threads_waiting"`
+ ThreadsTimedWaiting float64 `stm:"threads_timed_waiting"`
+ ThreadsTerminated float64 `stm:"threads_terminated"`
+ LogFatal float64 `stm:"log_fatal"`
+ LogError float64 `stm:"log_error"`
+ LogWarn float64 `stm:"log_warn"`
+ LogInfo float64 `stm:"log_info"`
+}
+
+type rpcActivityMetrics struct {
+ ReceivedBytes float64 `stm:"received_bytes"`
+ SentBytes float64 `stm:"sent_bytes"`
+ RpcQueueTimeNumOps float64 `stm:"queue_time_num_ops"`
+ RpcQueueTimeAvgTime float64 `stm:"queue_time_avg_time,1000,1"`
+ //RpcProcessingTimeNumOps float64
+ RpcProcessingTimeAvgTime float64 `stm:"processing_time_avg_time,1000,1"`
+ //DeferredRpcProcessingTimeNumOps float64
+ //DeferredRpcProcessingTimeAvgTime float64
+ //RpcAuthenticationFailures float64
+ //RpcAuthenticationSuccesses float64
+ //RpcAuthorizationFailures float64
+ //RpcAuthorizationSuccesses float64
+ //RpcClientBackoff float64
+ //RpcSlowCalls float64
+ NumOpenConnections float64 `stm:"num_open_connections"`
+ CallQueueLength float64 `stm:"call_queue_length"`
+ //NumDroppedConnections float64
+}
+
+type fsNameSystemMetrics struct {
+ HostName string `json:"tag.Hostname"`
+ HAState string `json:"tag.HAState"`
+ //TotalSyncTimes float64 `json:"tag.tag.TotalSyncTimes"`
+ MissingBlocks float64 `stm:"missing_blocks"`
+ //MissingReplOneBlocks float64 `stm:"missing_repl_one_blocks"`
+ //ExpiredHeartbeats float64 `stm:"expired_heartbeats"`
+ //TransactionsSinceLastCheckpoint float64 `stm:"transactions_since_last_checkpoint"`
+ //TransactionsSinceLastLogRoll float64 `stm:"transactions_since_last_log_roll"`
+ //LastWrittenTransactionId float64 `stm:"last_written_transaction_id"`
+ //LastCheckpointTime float64 `stm:"last_checkpoint_time"`
+ CapacityTotal float64 `stm:"capacity_total"`
+ //CapacityTotalGB float64 `stm:"capacity_total_gb"`
+ CapacityDfsUsed float64 `json:"CapacityUsed" stm:"capacity_used_dfs"`
+ //CapacityUsedGB float64 `stm:"capacity_used_gb"`
+ CapacityRemaining float64 `stm:"capacity_remaining"`
+ //ProvidedCapacityTotal float64 `stm:"provided_capacity_total"`
+ //CapacityRemainingGB float64 `stm:"capacity_remaining_gb"`
+ CapacityUsedNonDFS float64 `stm:"capacity_used_non_dfs"`
+ TotalLoad float64 `stm:"total_load"`
+ //SnapshottableDirectories float64 `stm:"snapshottable_directories"`
+ //Snapshots float64 `stm:"snapshots"`
+ //NumEncryptionZones float64 `stm:"num_encryption_zones"`
+ //LockQueueLength float64 `stm:"lock_queue_length"`
+ BlocksTotal float64 `stm:"blocks_total"`
+ //NumFilesUnderConstruction float64 `stm:"num_files_under_construction"`
+ //NumActiveClients float64 `stm:"num_active_clients"`
+ FilesTotal float64 `stm:"files_total"`
+ //PendingReplicationBlocks float64 `stm:"pending_replication_blocks"`
+ //PendingReconstructionBlocks float64 `stm:"pending_reconstruction_blocks"`
+ UnderReplicatedBlocks float64 `stm:"under_replicated_blocks"`
+ //LowRedundancyBlocks float64 `stm:"low_redundancy_blocks"`
+ CorruptBlocks float64 `stm:"corrupt_blocks"`
+ //ScheduledReplicationBlocks float64 `stm:"scheduled_replication_blocks"`
+ //PendingDeletionBlocks float64 `stm:"pending_deletion_blocks"`
+ //LowRedundancyReplicatedBlocks float64 `stm:"low_redundancy_replicated_blocks"`
+ //CorruptReplicatedBlocks float64 `stm:"corrupt_replicated_blocks"`
+ //MissingReplicatedBlocks float64 `stm:"missing_replicated_blocks"`
+ //MissingReplicationOneBlocks float64 `stm:"missing_replication_one_blocks"`
+ //HighestPriorityLowRedundancyReplicatedBlocks float64 `stm:"highest_priority_low_redundancy_replicated_blocks"`
+ //HighestPriorityLowRedundancyECBlocks float64 `stm:"highest_priority_low_redundancy_ec_blocks"`
+ //BytesInFutureReplicatedBlocks float64 `stm:"bytes_in_future_replicated_blocks"`
+ //PendingDeletionReplicatedBlocks float64 `stm:"pending_deletion_replicated_blocks"`
+ //TotalReplicatedBlocks float64 `stm:"total_replicated_blocks"`
+ //LowRedundancyECBlockGroups float64 `stm:"low_redundancy_ec_block_groups"`
+ //CorruptECBlockGroups float64 `stm:"corrupt_ec_block_groups"`
+ //MissingECBlockGroups float64 `stm:"missing_ec_block_groups"`
+ //BytesInFutureECBlockGroups float64 `stm:"bytes_in_future_ec_block_groups"`
+ //PendingDeletionECBlocks float64 `stm:"pending_deletion_ec_blocks"`
+ //TotalECBlockGroups float64 `stm:"total_ec_block_groups"`
+ //ExcessBlocks float64 `stm:"excess_blocks"`
+ //NumTimedOutPendingReconstructions float64 `stm:"num_timed_out_pending_reconstructions"`
+ //PostponedMisreplicatedBlocks float64 `stm:"postponed_misreplicated_blocks"`
+ //PendingDataNodeMessageCount float64 `stm:"pending_data_node_message_count"`
+ //MillisSinceLastLoadedEdits float64 `stm:"millis_since_last_loaded_edits"`
+ //BlockCapacity float64 `stm:"block_capacity"`
+ NumLiveDataNodes float64 `stm:"num_live_data_nodes"`
+ NumDeadDataNodes float64 `stm:"num_dead_data_nodes"`
+ //NumDecomLiveDataNodes float64 `stm:"num_decom_live_data_nodes"`
+ //NumDecomDeadDataNodes float64 `stm:"num_decom_dead_data_nodes"`
+ VolumeFailuresTotal float64 `stm:"volume_failures_total"`
+ //EstimatedCapacityLostTotal float64 `stm:"estimated_capacity_lost_total"`
+ //NumDecommissioningDataNodes float64 `stm:"num_decommissioning_data_nodes"`
+ StaleDataNodes float64 `stm:"stale_data_nodes"`
+ //NumStaleStorages float64 `stm:"num_stale_storages"`
+ //TotalSyncCount float64 `stm:"total_sync_count"`
+ //NumInMaintenanceLiveDataNodes float64 `stm:"num_in_maintenance_live_data_nodes"`
+ //NumInMaintenanceDeadDataNodes float64 `stm:"num_in_maintenance_dead_data_nodes"`
+ //NumEnteringMaintenanceDataNodes float64 `stm:"num_entering_maintenance_data_nodes"`
+
+ // custom attributes
+ CapacityUsed float64 `json:"-" stm:"capacity_used"`
+}
+
+type fsDatasetStateMetrics struct {
+ HostName string `json:"tag.Hostname"`
+ Capacity float64 `stm:"capacity_total"`
+ DfsUsed float64 `stm:"capacity_used_dfs"`
+ Remaining float64 `stm:"capacity_remaining"`
+ NumFailedVolumes float64 `stm:"num_failed_volumes"`
+ //LastVolumeFailureDate float64 `stm:"LastVolumeFailureDate"`
+ //EstimatedCapacityLostTotal float64 `stm:"EstimatedCapacityLostTotal"`
+ //CacheUsed float64 `stm:"CacheUsed"`
+ //CacheCapacity float64 `stm:"CacheCapacity"`
+ //NumBlocksCached float64 `stm:"NumBlocksCached"`
+ //NumBlocksFailedToCache float64 `stm:"NumBlocksFailedToCache"`
+ //NumBlocksFailedToUnCache float64 `stm:"NumBlocksFailedToUnCache"`
+
+ // custom attributes
+ CapacityUsedNonDFS float64 `stm:"capacity_used_non_dfs"`
+ CapacityUsed float64 `stm:"capacity_used"`
+}
+
+type dataNodeActivityMetrics struct {
+ HostName string `json:"tag.Hostname"`
+ BytesWritten float64 `stm:"bytes_written"`
+ //TotalWriteTime float64
+ BytesRead float64 `stm:"bytes_read"`
+ //TotalReadTime float64
+ //BlocksWritten float64
+ //BlocksRead float64
+ //BlocksReplicated float64
+ //BlocksRemoved float64
+ //BlocksVerified float64
+ //BlockVerificationFailures float64
+ //BlocksCached float64
+ //BlocksUncached float64
+ //ReadsFromLocalClient float64
+ //ReadsFromRemoteClient float64
+ //WritesFromLocalClient float64
+ //WritesFromRemoteClient float64
+ //BlocksGetLocalPathInfo float64
+ //RemoteBytesRead float64
+ //RemoteBytesWritten float64
+ //RamDiskBlocksWrite float64
+ //RamDiskBlocksWriteFallback float64
+ //RamDiskBytesWrite float64
+ //RamDiskBlocksReadHits float64
+ //RamDiskBlocksEvicted float64
+ //RamDiskBlocksEvictedWithoutRead float64
+ //RamDiskBlocksEvictionWindowMsNumOps float64
+ //RamDiskBlocksEvictionWindowMsAvgTime float64
+ //RamDiskBlocksLazyPersisted float64
+ //RamDiskBlocksDeletedBeforeLazyPersisted float64
+ //RamDiskBytesLazyPersisted float64
+ //RamDiskBlocksLazyPersistWindowMsNumOps float64
+ //RamDiskBlocksLazyPersistWindowMsAvgTime float64
+ //FsyncCount float64
+ //VolumeFailures float64
+ //DatanodeNetworkErrors float64
+ //DataNodeActiveXceiversCount float64
+ //ReadBlockOpNumOps float64
+ //ReadBlockOpAvgTime float64
+ //WriteBlockOpNumOps float64
+ //WriteBlockOpAvgTime float64
+ //BlockChecksumOpNumOps float64
+ //BlockChecksumOpAvgTime float64
+ //CopyBlockOpNumOps float64
+ //CopyBlockOpAvgTime float64
+ //ReplaceBlockOpNumOps float64
+ //ReplaceBlockOpAvgTime float64
+ //HeartbeatsNumOps float64
+ //HeartbeatsAvgTime float64
+ //HeartbeatsTotalNumOps float64
+ //HeartbeatsTotalAvgTime float64
+ //LifelinesNumOps float64
+ //LifelinesAvgTime float64
+ //BlockReportsNumOps float64
+ //BlockReportsAvgTime float64
+ //IncrementalBlockReportsNumOps float64
+ //IncrementalBlockReportsAvgTime float64
+ //CacheReportsNumOps float64
+ //CacheReportsAvgTime float64
+ //PacketAckRoundTripTimeNanosNumOps float64
+ //PacketAckRoundTripTimeNanosAvgTime float64
+ //FlushNanosNumOps float64
+ //FlushNanosAvgTime float64
+ //FsyncNanosNumOps float64
+ //FsyncNanosAvgTime float64
+ //SendDataPacketBlockedOnNetworkNanosNumOps float64
+ //SendDataPacketBlockedOnNetworkNanosAvgTime float64
+ //SendDataPacketTransferNanosNumOps float64
+ //SendDataPacketTransferNanosAvgTime float64
+ //BlocksInPendingIBR float64
+ //BlocksReceivingInPendingIBR float64
+ //BlocksReceivedInPendingIBR float64
+ //BlocksDeletedInPendingIBR float64
+ //EcReconstructionTasks float64
+ //EcFailedReconstructionTasks float64
+ //EcDecodingTimeNanos float64
+ //EcReconstructionBytesRead float64
+ //EcReconstructionBytesWritten float64
+ //EcReconstructionRemoteBytesRead float64
+ //EcReconstructionReadTimeMillis float64
+ //EcReconstructionDecodingTimeMillis float64
+ //EcReconstructionWriteTimeMillis float64
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/raw_data.go b/src/go/collectors/go.d.plugin/modules/hdfs/raw_data.go
new file mode 100644
index 000000000..ab434ae17
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/raw_data.go
@@ -0,0 +1,51 @@
+package hdfs
+
+import (
+ "encoding/json"
+ "strings"
+)
+
+type (
+ rawData map[string]json.RawMessage
+ rawJMX struct {
+ Beans []rawData
+ }
+)
+
+func (r rawJMX) isEmpty() bool {
+ return len(r.Beans) == 0
+}
+
+func (r rawJMX) find(f func(rawData) bool) rawData {
+ for _, v := range r.Beans {
+ if f(v) {
+ return v
+ }
+ }
+ return nil
+}
+
+func (r rawJMX) findJvm() rawData {
+ f := func(data rawData) bool { return string(data["modelerType"]) == "\"JvmMetrics\"" }
+ return r.find(f)
+}
+
+func (r rawJMX) findRPCActivity() rawData {
+ f := func(data rawData) bool { return strings.HasPrefix(string(data["modelerType"]), "\"RpcActivityForPort") }
+ return r.find(f)
+}
+
+func (r rawJMX) findFSNameSystem() rawData {
+ f := func(data rawData) bool { return string(data["modelerType"]) == "\"FSNamesystem\"" }
+ return r.find(f)
+}
+
+func (r rawJMX) findFSDatasetState() rawData {
+ f := func(data rawData) bool { return string(data["modelerType"]) == "\"FSDatasetState\"" }
+ return r.find(f)
+}
+
+func (r rawJMX) findDataNodeActivity() rawData {
+ f := func(data rawData) bool { return strings.HasPrefix(string(data["modelerType"]), "\"DataNodeActivity") }
+ return r.find(f)
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.json b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.json
new file mode 100644
index 000000000..984c3ed6e
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.json
@@ -0,0 +1,20 @@
+{
+ "update_every": 123,
+ "url": "ok",
+ "body": "ok",
+ "method": "ok",
+ "headers": {
+ "ok": "ok"
+ },
+ "username": "ok",
+ "password": "ok",
+ "proxy_url": "ok",
+ "proxy_username": "ok",
+ "proxy_password": "ok",
+ "timeout": 123.123,
+ "not_follow_redirects": true,
+ "tls_ca": "ok",
+ "tls_cert": "ok",
+ "tls_key": "ok",
+ "tls_skip_verify": true
+}
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.yaml
new file mode 100644
index 000000000..8558b61cc
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/config.yaml
@@ -0,0 +1,17 @@
+update_every: 123
+url: "ok"
+body: "ok"
+method: "ok"
+headers:
+ ok: "ok"
+username: "ok"
+password: "ok"
+proxy_url: "ok"
+proxy_username: "ok"
+proxy_password: "ok"
+timeout: 123.123
+not_follow_redirects: yes
+tls_ca: "ok"
+tls_cert: "ok"
+tls_key: "ok"
+tls_skip_verify: yes
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/testdata/datanode.json b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/datanode.json
new file mode 100644
index 000000000..0f657d560
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/datanode.json
@@ -0,0 +1,165 @@
+{
+ "beans":[
+ {
+ "name":"Hadoop:service=DataNode,name=JvmMetrics",
+ "modelerType":"JvmMetrics",
+ "tag.Context":"jvm",
+ "tag.ProcessName":"DataNode",
+ "tag.SessionId":null,
+ "tag.Hostname":"dev-slave-01.dev.local",
+ "MemNonHeapUsedM":53.67546,
+ "MemNonHeapCommittedM":54.9375,
+ "MemNonHeapMaxM":-1.0,
+ "MemHeapUsedM":18.885735,
+ "MemHeapCommittedM":60.5,
+ "MemHeapMaxM":843.0,
+ "MemMaxM":843.0,
+ "GcCount":155,
+ "GcTimeMillis":672,
+ "GcNumWarnThresholdExceeded":0,
+ "GcNumInfoThresholdExceeded":0,
+ "GcTotalExtraSleepTime":8783,
+ "ThreadsNew":0,
+ "ThreadsRunnable":11,
+ "ThreadsBlocked":0,
+ "ThreadsWaiting":11,
+ "ThreadsTimedWaiting":25,
+ "ThreadsTerminated":0,
+ "LogFatal":0,
+ "LogError":1,
+ "LogWarn":2,
+ "LogInfo":257
+ },
+ {
+ "name":"Hadoop:service=DataNode,name=FSDatasetState",
+ "modelerType":"FSDatasetState",
+ "tag.Context":"FSDatasetState",
+ "tag.StorageInfo":"FSDataset{dirpath='[/data/hdfs/data]'}",
+ "tag.Hostname":"dev-slave-01.dev.local",
+ "Capacity":53675536384,
+ "DfsUsed":1186058240,
+ "Remaining":32920760320,
+ "NumFailedVolumes":0,
+ "LastVolumeFailureDate":0,
+ "EstimatedCapacityLostTotal":0,
+ "CacheUsed":0,
+ "CacheCapacity":0,
+ "NumBlocksCached":0,
+ "NumBlocksFailedToCache":0,
+ "NumBlocksFailedToUnCache":4
+ },
+ {
+ "name":"Hadoop:service=DataNode,name=DataNodeActivity-dev-slave-01.dev.local-9866",
+ "modelerType":"DataNodeActivity-dev-slave-01.dev.local-9866",
+ "tag.SessionId":null,
+ "tag.Context":"dfs",
+ "tag.Hostname":"dev-slave-01.dev.local",
+ "BytesWritten":500960407,
+ "TotalWriteTime":463,
+ "BytesRead":80689178,
+ "TotalReadTime":41203,
+ "BlocksWritten":16,
+ "BlocksRead":16,
+ "BlocksReplicated":4,
+ "BlocksRemoved":4,
+ "BlocksVerified":0,
+ "BlockVerificationFailures":0,
+ "BlocksCached":0,
+ "BlocksUncached":0,
+ "ReadsFromLocalClient":0,
+ "ReadsFromRemoteClient":16,
+ "WritesFromLocalClient":0,
+ "WritesFromRemoteClient":12,
+ "BlocksGetLocalPathInfo":0,
+ "RemoteBytesRead":80689178,
+ "RemoteBytesWritten":97283223,
+ "RamDiskBlocksWrite":0,
+ "RamDiskBlocksWriteFallback":0,
+ "RamDiskBytesWrite":0,
+ "RamDiskBlocksReadHits":0,
+ "RamDiskBlocksEvicted":0,
+ "RamDiskBlocksEvictedWithoutRead":0,
+ "RamDiskBlocksEvictionWindowMsNumOps":0,
+ "RamDiskBlocksEvictionWindowMsAvgTime":0.0,
+ "RamDiskBlocksLazyPersisted":0,
+ "RamDiskBlocksDeletedBeforeLazyPersisted":0,
+ "RamDiskBytesLazyPersisted":0,
+ "RamDiskBlocksLazyPersistWindowMsNumOps":0,
+ "RamDiskBlocksLazyPersistWindowMsAvgTime":0.0,
+ "FsyncCount":0,
+ "VolumeFailures":0,
+ "DatanodeNetworkErrors":7,
+ "DataNodeActiveXceiversCount":0,
+ "ReadBlockOpNumOps":16,
+ "ReadBlockOpAvgTime":2258.2,
+ "WriteBlockOpNumOps":12,
+ "WriteBlockOpAvgTime":12640.666666666666,
+ "BlockChecksumOpNumOps":0,
+ "BlockChecksumOpAvgTime":0.0,
+ "CopyBlockOpNumOps":0,
+ "CopyBlockOpAvgTime":0.0,
+ "ReplaceBlockOpNumOps":0,
+ "ReplaceBlockOpAvgTime":0.0,
+ "HeartbeatsNumOps":285073,
+ "HeartbeatsAvgTime":1.2035398230088497,
+ "HeartbeatsTotalNumOps":285073,
+ "HeartbeatsTotalAvgTime":1.2035398230088497,
+ "LifelinesNumOps":0,
+ "LifelinesAvgTime":0.0,
+ "BlockReportsNumOps":41,
+ "BlockReportsAvgTime":2.0,
+ "IncrementalBlockReportsNumOps":20,
+ "IncrementalBlockReportsAvgTime":1.2,
+ "CacheReportsNumOps":0,
+ "CacheReportsAvgTime":0.0,
+ "PacketAckRoundTripTimeNanosNumOps":603,
+ "PacketAckRoundTripTimeNanosAvgTime":1733672.0,
+ "FlushNanosNumOps":7660,
+ "FlushNanosAvgTime":3988.858108108108,
+ "FsyncNanosNumOps":0,
+ "FsyncNanosAvgTime":0.0,
+ "SendDataPacketBlockedOnNetworkNanosNumOps":7091,
+ "SendDataPacketBlockedOnNetworkNanosAvgTime":2.4469053762711864E7,
+ "SendDataPacketTransferNanosNumOps":7091,
+ "SendDataPacketTransferNanosAvgTime":37130.05084745763,
+ "BlocksInPendingIBR":0,
+ "BlocksReceivingInPendingIBR":0,
+ "BlocksReceivedInPendingIBR":0,
+ "BlocksDeletedInPendingIBR":0,
+ "EcReconstructionTasks":0,
+ "EcFailedReconstructionTasks":0,
+ "EcDecodingTimeNanos":0,
+ "EcReconstructionBytesRead":0,
+ "EcReconstructionBytesWritten":0,
+ "EcReconstructionRemoteBytesRead":0,
+ "EcReconstructionReadTimeMillis":0,
+ "EcReconstructionDecodingTimeMillis":0,
+ "EcReconstructionWriteTimeMillis":0
+ },
+ {
+ "name":"Hadoop:service=DataNode,name=RpcActivityForPort9867",
+ "modelerType":"RpcActivityForPort9867",
+ "tag.port":"9867",
+ "tag.Context":"rpc",
+ "tag.NumOpenConnectionsPerUser":"{}",
+ "tag.Hostname":"dev-slave-01.dev.local",
+ "ReceivedBytes":7,
+ "SentBytes":187,
+ "RpcQueueTimeNumOps":0,
+ "RpcQueueTimeAvgTime":0.0,
+ "RpcProcessingTimeNumOps":0,
+ "RpcProcessingTimeAvgTime":0.0,
+ "DeferredRpcProcessingTimeNumOps":0,
+ "DeferredRpcProcessingTimeAvgTime":0.0,
+ "RpcAuthenticationFailures":0,
+ "RpcAuthenticationSuccesses":0,
+ "RpcAuthorizationFailures":0,
+ "RpcAuthorizationSuccesses":0,
+ "RpcClientBackoff":0,
+ "RpcSlowCalls":0,
+ "NumOpenConnections":0,
+ "CallQueueLength":0,
+ "NumDroppedConnections":0
+ }
+ ]
+} \ No newline at end of file
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/testdata/namenode.json b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/namenode.json
new file mode 100644
index 000000000..2d33d32f3
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/namenode.json
@@ -0,0 +1,132 @@
+{
+ "beans":[
+ {
+ "name":"Hadoop:service=NameNode,name=JvmMetrics",
+ "modelerType":"JvmMetrics",
+ "tag.Context":"jvm",
+ "tag.ProcessName":"NameNode",
+ "tag.SessionId":null,
+ "tag.Hostname":"dev-master-02.dev.local",
+ "MemNonHeapUsedM":66.170395,
+ "MemNonHeapCommittedM":67.75,
+ "MemNonHeapMaxM":-1.0,
+ "MemHeapUsedM":26.603287,
+ "MemHeapCommittedM":67.0,
+ "MemHeapMaxM":843.0,
+ "MemMaxM":843.0,
+ "GcCount":1699,
+ "GcTimeMillis":3483,
+ "GcNumWarnThresholdExceeded":0,
+ "GcNumInfoThresholdExceeded":0,
+ "GcTotalExtraSleepTime":1944,
+ "ThreadsNew":0,
+ "ThreadsRunnable":7,
+ "ThreadsBlocked":0,
+ "ThreadsWaiting":6,
+ "ThreadsTimedWaiting":34,
+ "ThreadsTerminated":0,
+ "LogFatal":0,
+ "LogError":0,
+ "LogWarn":3378983,
+ "LogInfo":3382077
+ },
+ {
+ "name":"Hadoop:service=NameNode,name=FSNamesystem",
+ "modelerType":"FSNamesystem",
+ "tag.Context":"dfs",
+ "tag.HAState":"active",
+ "tag.TotalSyncTimes":"98 ",
+ "tag.Hostname":"dev-master-02.dev.local",
+ "MissingBlocks":0,
+ "MissingReplOneBlocks":0,
+ "ExpiredHeartbeats":0,
+ "TransactionsSinceLastCheckpoint":1,
+ "TransactionsSinceLastLogRoll":1,
+ "LastWrittenTransactionId":624,
+ "LastCheckpointTime":1566814983890,
+ "CapacityTotal":107351072768,
+ "CapacityTotalGB":100.0,
+ "CapacityUsed":2372116480,
+ "CapacityUsedGB":2.0,
+ "CapacityRemaining":65861697536,
+ "ProvidedCapacityTotal":0,
+ "CapacityRemainingGB":61.0,
+ "CapacityUsedNonDFS":39117258752,
+ "TotalLoad":2,
+ "SnapshottableDirectories":0,
+ "Snapshots":0,
+ "NumEncryptionZones":0,
+ "LockQueueLength":0,
+ "BlocksTotal":15,
+ "NumFilesUnderConstruction":0,
+ "NumActiveClients":0,
+ "FilesTotal":12,
+ "PendingReplicationBlocks":0,
+ "PendingReconstructionBlocks":0,
+ "UnderReplicatedBlocks":0,
+ "LowRedundancyBlocks":0,
+ "CorruptBlocks":0,
+ "ScheduledReplicationBlocks":0,
+ "PendingDeletionBlocks":0,
+ "LowRedundancyReplicatedBlocks":0,
+ "CorruptReplicatedBlocks":0,
+ "MissingReplicatedBlocks":0,
+ "MissingReplicationOneBlocks":0,
+ "HighestPriorityLowRedundancyReplicatedBlocks":0,
+ "HighestPriorityLowRedundancyECBlocks":0,
+ "BytesInFutureReplicatedBlocks":0,
+ "PendingDeletionReplicatedBlocks":0,
+ "TotalReplicatedBlocks":15,
+ "LowRedundancyECBlockGroups":0,
+ "CorruptECBlockGroups":0,
+ "MissingECBlockGroups":0,
+ "BytesInFutureECBlockGroups":0,
+ "PendingDeletionECBlocks":0,
+ "TotalECBlockGroups":0,
+ "ExcessBlocks":0,
+ "NumTimedOutPendingReconstructions":0,
+ "PostponedMisreplicatedBlocks":0,
+ "PendingDataNodeMessageCount":0,
+ "MillisSinceLastLoadedEdits":0,
+ "BlockCapacity":2097152,
+ "NumLiveDataNodes":2,
+ "NumDeadDataNodes":0,
+ "NumDecomLiveDataNodes":0,
+ "NumDecomDeadDataNodes":0,
+ "VolumeFailuresTotal":0,
+ "EstimatedCapacityLostTotal":0,
+ "NumDecommissioningDataNodes":0,
+ "StaleDataNodes":0,
+ "NumStaleStorages":0,
+ "TotalSyncCount":2,
+ "NumInMaintenanceLiveDataNodes":0,
+ "NumInMaintenanceDeadDataNodes":0,
+ "NumEnteringMaintenanceDataNodes":0
+ },
+ {
+ "name":"Hadoop:service=NameNode,name=RpcActivityForPort9000",
+ "modelerType":"RpcActivityForPort9000",
+ "tag.port":"9000",
+ "tag.Context":"rpc",
+ "tag.NumOpenConnectionsPerUser":"{\"hadoop\":2}",
+ "tag.Hostname":"dev-master-02.dev.local",
+ "ReceivedBytes":240431351,
+ "SentBytes":25067414,
+ "RpcQueueTimeNumOps":585402,
+ "RpcQueueTimeAvgTime":0.05813953488372093,
+ "RpcProcessingTimeNumOps":585402,
+ "RpcProcessingTimeAvgTime":0.0,
+ "DeferredRpcProcessingTimeNumOps":0,
+ "DeferredRpcProcessingTimeAvgTime":0.0,
+ "RpcAuthenticationFailures":0,
+ "RpcAuthenticationSuccesses":0,
+ "RpcAuthorizationFailures":0,
+ "RpcAuthorizationSuccesses":14327,
+ "RpcClientBackoff":0,
+ "RpcSlowCalls":0,
+ "NumOpenConnections":2,
+ "CallQueueLength":0,
+ "NumDroppedConnections":0
+ }
+ ]
+} \ No newline at end of file
diff --git a/src/go/collectors/go.d.plugin/modules/hdfs/testdata/unknownnode.json b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/unknownnode.json
new file mode 100644
index 000000000..7370a7a37
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/hdfs/testdata/unknownnode.json
@@ -0,0 +1,34 @@
+{
+ "beans":[
+ {
+ "name":"Hadoop:service=UnknownNode,name=JvmMetrics",
+ "modelerType":"JvmMetrics",
+ "tag.Context":"jvm",
+ "tag.ProcessName":"UnknownNode",
+ "tag.SessionId":null,
+ "tag.Hostname":"dev-slave-01.dev.local",
+ "MemNonHeapUsedM":53.67546,
+ "MemNonHeapCommittedM":54.9375,
+ "MemNonHeapMaxM":-1.0,
+ "MemHeapUsedM":18.885735,
+ "MemHeapCommittedM":60.5,
+ "MemHeapMaxM":843.0,
+ "MemMaxM":843.0,
+ "GcCount":155,
+ "GcTimeMillis":672,
+ "GcNumWarnThresholdExceeded":0,
+ "GcNumInfoThresholdExceeded":0,
+ "GcTotalExtraSleepTime":8783,
+ "ThreadsNew":1,
+ "ThreadsRunnable":2,
+ "ThreadsBlocked":3,
+ "ThreadsWaiting":4,
+ "ThreadsTimedWaiting":5,
+ "ThreadsTerminated":6,
+ "LogFatal":10,
+ "LogError":11,
+ "LogWarn":12,
+ "LogInfo":13
+ }
+ ]
+} \ No newline at end of file