summaryrefslogtreecommitdiffstats
path: root/web/gui/dashboard_info.js
diff options
context:
space:
mode:
Diffstat (limited to 'web/gui/dashboard_info.js')
-rw-r--r--web/gui/dashboard_info.js2932
1 files changed, 2657 insertions, 275 deletions
diff --git a/web/gui/dashboard_info.js b/web/gui/dashboard_info.js
index 09647b103..dfa3e84ba 100644
--- a/web/gui/dashboard_info.js
+++ b/web/gui/dashboard_info.js
@@ -20,8 +20,10 @@ netdataDashboard.menu = {
'services': {
title: 'systemd Services',
icon: '<i class="fas fa-cogs"></i>',
- info: 'Resources utilization of systemd services. netdata monitors all systemd services via CGROUPS ' +
- '(the resources accounting used by containers). '
+ info: 'Resources utilization of systemd services. '+
+ 'Netdata monitors all systemd services via '+
+ '<a href="https://en.wikipedia.org/wiki/Cgroups" target="_blank">cgroups</a> ' +
+ '(the resources accounting used by containers).'
},
'ap': {
@@ -47,7 +49,17 @@ netdataDashboard.menu = {
'net': {
title: 'Network Interfaces',
icon: '<i class="fas fa-sitemap"></i>',
- info: 'Performance metrics for network interfaces.'
+ info: '<p>Performance <a href="https://www.kernel.org/doc/html/latest/networking/statistics.html" target="_blank">metrics for network interfaces</a>.</p>'+
+ '<p>Netdata retrieves this data reading the <code>/proc/net/dev</code> file and <code>/sys/class/net/</code> directory.</p>'
+ },
+
+ 'Infiniband': {
+ title: 'Infiniband ports',
+ icon: '<i class="fas fa-sitemap"></i>',
+ info: '<p>Performance and exception statistics for '+
+ '<a href="https://en.wikipedia.org/wiki/InfiniBand" target="_blank">Infiniband</a> ports. '+
+ 'The individual port and hardware counter descriptions can be found in the '+
+ '<a href="https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters" target="_blank">Mellanox knowledge base</a>.'
},
'wireless': {
@@ -61,7 +73,7 @@ netdataDashboard.menu = {
icon: '<i class="fas fa-cloud"></i>',
info: function (os) {
if (os === "linux")
- return 'Metrics for the networking stack of the system. These metrics are collected from <code>/proc/net/netstat</code>, apply to both IPv4 and IPv6 traffic and are related to operation of the kernel networking stack.';
+ return 'Metrics for the networking stack of the system. These metrics are collected from <code>/proc/net/netstat</code> or attaching <code>kprobes</code> to kernel functions, apply to both IPv4 and IPv6 traffic and are related to operation of the kernel networking stack.';
else
return 'Metrics for the networking stack of the system.';
}
@@ -88,13 +100,26 @@ netdataDashboard.menu = {
'sctp': {
title: 'SCTP Networking',
icon: '<i class="fas fa-cloud"></i>',
- info: '<a href="https://en.wikipedia.org/wiki/Stream_Control_Transmission_Protocol" target="_blank">Stream Control Transmission Protocol (SCTP)</a> is a computer network protocol which operates at the transport layer and serves a role similar to the popular protocols TCP and UDP. SCTP provides some of the features of both UDP and TCP: it is message-oriented like UDP and ensures reliable, in-sequence transport of messages with congestion control like TCP. It differs from those protocols by providing multi-homing and redundant paths to increase resilience and reliability.'
+ info: '<p><a href="https://en.wikipedia.org/wiki/Stream_Control_Transmission_Protocol" target="_blank">Stream Control Transmission Protocol (SCTP)</a> '+
+ 'is a computer network protocol which operates at the transport layer and serves a role similar to the popular '+
+ 'protocols TCP and UDP. SCTP provides some of the features of both UDP and TCP: it is message-oriented like UDP '+
+ 'and ensures reliable, in-sequence transport of messages with congestion control like TCP. '+
+ 'It differs from those protocols by providing multi-homing and redundant paths to increase resilience and reliability.</p>'+
+ '<p>Netdata collects SCTP metrics reading the <code>/proc/net/sctp/snmp</code> file.</p>'
},
'ipvs': {
title: 'IP Virtual Server',
icon: '<i class="fas fa-eye"></i>',
- info: '<a href="http://www.linuxvirtualserver.org/software/ipvs.html" target="_blank">IPVS (IP Virtual Server)</a> implements transport-layer load balancing inside the Linux kernel, so called Layer-4 switching. IPVS running on a host acts as a load balancer at the front of a cluster of real servers, it can direct requests for TCP/UDP based services to the real servers, and makes services of the real servers to appear as a virtual service on a single IP address.'
+ info: '<p><a href="http://www.linuxvirtualserver.org/software/ipvs.html" target="_blank">IPVS (IP Virtual Server)</a> '+
+ 'implements transport-layer load balancing inside the Linux kernel, so called Layer-4 switching. '+
+ 'IPVS running on a host acts as a load balancer at the front of a cluster of real servers, '+
+ 'it can direct requests for TCP/UDP based services to the real servers, '+
+ 'and makes services of the real servers to appear as a virtual service on a single IP address.</p>'+
+ '<p>Netdata collects summary statistics, reading <code>/proc/net/ip_vs_stats</code>. '+
+ 'To display the statistics information of services and their servers, run <code>ipvsadm -Ln --stats</code> '+
+ 'or <code>ipvsadm -Ln --rate</code> for the rate statistics. '+
+ 'For details, see <a href="https://linux.die.net/man/8/ipvsadm" target="_blank">ipvsadm(8)</a>.</p>'
},
'netfilter': {
@@ -127,6 +152,22 @@ netdataDashboard.menu = {
info: 'Charts with performance information for all the system disks. Special care has been given to present disk performance metrics in a way compatible with <code>iostat -x</code>. netdata by default prevents rendering performance charts for individual partitions and unmounted virtual disks. Disabled charts can still be enabled by configuring the relative settings in the netdata configuration file.'
},
+ 'mount': {
+ title: 'Mount Points',
+ icon: '<i class="fas fa-hdd"></i>',
+ info: ''
+ },
+
+ 'mdstat': {
+ title: 'MD arrays',
+ icon: '<i class="fas fa-hdd"></i>',
+ info: '<p>RAID devices are virtual devices created from two or more real block devices. '+
+ '<a href="https://man7.org/linux/man-pages/man4/md.4.html" target="_blank">Linux Software RAID</a> devices are '+
+ 'implemented through the md (Multiple Devices) device driver.</p>'+
+ '<p>Netdata monitors the current status of MD arrays reading <a href="https://raid.wiki.kernel.org/index.php/Mdstat" target="_blank">/proc/mdstat</a> and '+
+ '<code>/sys/block/%s/md/mismatch_cnt</code> files.</p>'
+ },
+
'sensors': {
title: 'Sensors',
icon: '<i class="fas fa-leaf"></i>',
@@ -148,19 +189,29 @@ netdataDashboard.menu = {
'nfsd': {
title: 'NFS Server',
icon: '<i class="fas fa-folder-open"></i>',
- info: 'Performance metrics of the Network File Server. NFS is a distributed file system protocol, allowing a user on a client computer to access files over a network, much like local storage is accessed. NFS, like many other protocols, builds on the Open Network Computing Remote Procedure Call (ONC RPC) system. The NFS is an open standard defined in Request for Comments (RFC).'
+ info: 'Performance metrics of the Network File Server. '+
+ '<a href="https://en.wikipedia.org/wiki/Network_File_System" target="_blank">NFS</a> '+
+ 'is a distributed file system protocol, allowing a user on a client computer to access files over a network, '+
+ 'much like local storage is accessed. '+
+ 'NFS, like many other protocols, builds on the Open Network Computing Remote Procedure Call (ONC RPC) system.'
},
'nfs': {
title: 'NFS Client',
icon: '<i class="fas fa-folder-open"></i>',
- info: 'Performance metrics of the NFS operations of this system, acting as an NFS client.'
+ info: 'Performance metrics of the '+
+ '<a href="https://en.wikipedia.org/wiki/Network_File_System" target="_blank">NFS</a> '+
+ 'operations of this system, acting as an NFS client.'
},
'zfs': {
- title: 'ZFS filesystem',
+ title: 'ZFS Cache',
icon: '<i class="fas fa-folder-open"></i>',
- info: 'Performance metrics of the ZFS filesystem. The following charts visualize all metrics reported by <a href="https://github.com/zfsonlinux/zfs/blob/master/cmd/arcstat/arcstat" target="_blank">arcstat.py</a> and <a href="https://github.com/zfsonlinux/zfs/blob/master/cmd/arc_summary/arc_summary3" target="_blank">arc_summary.py</a>.'
+ info: 'Performance metrics of the '+
+ '<a href="https://en.wikipedia.org/wiki/ZFS#Caching_mechanisms" target="_blank">ZFS ARC and L2ARC</a>. '+
+ 'The following charts visualize all metrics reported by '+
+ '<a href="https://github.com/openzfs/zfs/blob/master/cmd/arcstat/arcstat.in" target="_blank">arcstat.py</a> and '+
+ '<a href="https://github.com/openzfs/zfs/blob/master/cmd/arc_summary/arc_summary3" target="_blank">arc_summary.py</a>.'
},
'zfspool': {
@@ -178,21 +229,37 @@ netdataDashboard.menu = {
'apps': {
title: 'Applications',
icon: '<i class="fas fa-heartbeat"></i>',
- info: 'Per application statistics are collected using netdata\'s <code>apps.plugin</code>. This plugin walks through all processes and aggregates statistics for applications of interest, defined in <code>/etc/netdata/apps_groups.conf</code>, which can be edited by running <code>$ /etc/netdata/edit-config apps_groups.conf</code> (the default is <a href="https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/apps_groups.conf" target="_blank">here</a>). The plugin internally builds a process tree (much like <code>ps fax</code> does), and groups processes together (evaluating both child and parent processes) so that the result is always a chart with a predefined set of dimensions (of course, only application groups found running are reported). The reported values are compatible with <code>top</code>, although the netdata plugin counts also the resources of exited children (unlike <code>top</code> which shows only the resources of the currently running processes). So for processes like shell scripts, the reported values include the resources used by the commands these scripts run within each timeframe.',
+ info: 'Per application statistics are collected using '+
+ '<a href="https://learn.netdata.cloud/docs/agent/collectors/apps.plugin" target="_blank">apps.plugin</a>. '+
+ 'This plugin walks through all processes and aggregates statistics for '+
+ '<a href="https://learn.netdata.cloud/docs/agent/collectors/apps.plugin#configuration" target="_blank">application groups</a>. '+
+ 'The plugin also counts the resources of exited children. '+
+ 'So for processes like shell scripts, the reported values include the resources used by the commands '+
+ 'these scripts run within each timeframe.',
height: 1.5
},
- 'users': {
- title: 'Users',
+ 'groups': {
+ title: 'User Groups',
icon: '<i class="fas fa-user"></i>',
- info: 'Per user statistics are collected using netdata\'s <code>apps.plugin</code>. This plugin walks through all processes and aggregates statistics per user. The reported values are compatible with <code>top</code>, although the netdata plugin counts also the resources of exited children (unlike <code>top</code> which shows only the resources of the currently running processes). So for processes like shell scripts, the reported values include the resources used by the commands these scripts run within each timeframe.',
+ info: 'Per user group statistics are collected using '+
+ '<a href="https://learn.netdata.cloud/docs/agent/collectors/apps.plugin" target="_blank">apps.plugin</a>. '+
+ 'This plugin walks through all processes and aggregates statistics per user group. '+
+ 'The plugin also counts the resources of exited children. '+
+ 'So for processes like shell scripts, the reported values include the resources used by the commands '+
+ 'these scripts run within each timeframe.',
height: 1.5
},
- 'groups': {
- title: 'User Groups',
+ 'users': {
+ title: 'Users',
icon: '<i class="fas fa-users"></i>',
- info: 'Per user group statistics are collected using netdata\'s <code>apps.plugin</code>. This plugin walks through all processes and aggregates statistics per user group. The reported values are compatible with <code>top</code>, although the netdata plugin counts also the resources of exited children (unlike <code>top</code> which shows only the resources of the currently running processes). So for processes like shell scripts, the reported values include the resources used by the commands these scripts run within each timeframe.',
+ info: 'Per user statistics are collected using '+
+ '<a href="https://learn.netdata.cloud/docs/agent/collectors/apps.plugin" target="_blank">apps.plugin</a>. '+
+ 'This plugin walks through all processes and aggregates statistics per user. '+
+ 'The plugin also counts the resources of exited children. '+
+ 'So for processes like shell scripts, the reported values include the resources used by the commands '+
+ 'these scripts run within each timeframe.',
height: 1.5
},
@@ -424,37 +491,37 @@ netdataDashboard.menu = {
'couchdb': {
icon: '<i class="fas fa-database"></i>',
- info: 'Performance metrics for <b><a href="https://couchdb.apache.org/">CouchDB</a></b>, the open-source, JSON document-based database with an HTTP API and multi-master replication.'
+ info: 'Performance metrics for <b><a href="https://couchdb.apache.org/" target="_blank">CouchDB</a></b>, the open-source, JSON document-based database with an HTTP API and multi-master replication.'
},
'beanstalk': {
title: 'Beanstalkd',
icon: '<i class="fas fa-tasks"></i>',
- info: 'Provides statistics on the <b><a href="http://kr.github.io/beanstalkd/">beanstalkd</a></b> server and any tubes available on that server using data pulled from beanstalkc'
+ info: 'Provides statistics on the <b><a href="http://kr.github.io/beanstalkd/" target="_blank">beanstalkd</a></b> server and any tubes available on that server using data pulled from beanstalkc'
},
'rabbitmq': {
title: 'RabbitMQ',
icon: '<i class="fas fa-comments"></i>',
- info: 'Performance data for the <b><a href="https://www.rabbitmq.com/">RabbitMQ</a></b> open-source message broker.'
+ info: 'Performance data for the <b><a href="https://www.rabbitmq.com/" target="_blank">RabbitMQ</a></b> open-source message broker.'
},
'ceph': {
title: 'Ceph',
icon: '<i class="fas fa-database"></i>',
- info: 'Provides statistics on the <b><a href="http://ceph.com/">ceph</a></b> cluster server, the open-source distributed storage system.'
+ info: 'Provides statistics on the <b><a href="http://ceph.com/" target="_blank">ceph</a></b> cluster server, the open-source distributed storage system.'
},
'ntpd': {
title: 'ntpd',
icon: '<i class="fas fa-clock"></i>',
- info: 'Provides statistics for the internal variables of the Network Time Protocol daemon <b><a href="http://www.ntp.org/">ntpd</a></b> and optional including the configured peers (if enabled in the module configuration). The module presents the performance metrics as shown by <b><a href="http://doc.ntp.org/current-stable/ntpq.html">ntpq</a></b> (the standard NTP query program) using NTP mode 6 UDP packets to communicate with the NTP server.'
+ info: 'Provides statistics for the internal variables of the Network Time Protocol daemon <b><a href="http://www.ntp.org/" target="_blank">ntpd</a></b> and optional including the configured peers (if enabled in the module configuration). The module presents the performance metrics as shown by <b><a href="http://doc.ntp.org/current-stable/ntpq.html">ntpq</a></b> (the standard NTP query program) using NTP mode 6 UDP packets to communicate with the NTP server.'
},
'spigotmc': {
title: 'Spigot MC',
icon: '<i class="fas fa-eye"></i>',
- info: 'Provides basic performance statistics for the <b><a href="https://www.spigotmc.org/">Spigot Minecraft</a></b> server.'
+ info: 'Provides basic performance statistics for the <b><a href="https://www.spigotmc.org/" target="_blank">Spigot Minecraft</a></b> server.'
},
'unbound': {
@@ -466,13 +533,13 @@ netdataDashboard.menu = {
'boinc': {
title: 'BOINC',
icon: '<i class="fas fa-microchip"></i>',
- info: 'Provides task counts for <b><a href="http://boinc.berkeley.edu/">BOINC</a></b> distributed computing clients.'
+ info: 'Provides task counts for <b><a href="http://boinc.berkeley.edu/" target="_blank">BOINC</a></b> distributed computing clients.'
},
'w1sensor': {
title: '1-Wire Sensors',
icon: '<i class="fas fa-thermometer-half"></i>',
- info: 'Data derived from <a href="https://en.wikipedia.org/wiki/1-Wire">1-Wire</a> sensors. Currently temperature sensors are automatically detected.'
+ info: 'Data derived from <a href="https://en.wikipedia.org/wiki/1-Wire" target="_blank">1-Wire</a> sensors. Currently temperature sensors are automatically detected.'
},
'logind': {
@@ -484,7 +551,7 @@ netdataDashboard.menu = {
'powersupply': {
title: 'Power Supply',
icon: '<i class="fas fa-battery-half"></i>',
- info: 'Statistics for the various system power supplies. Data collected from <a href="https://www.kernel.org/doc/Documentation/power/power_supply_class.txt">Linux power supply class</a>.'
+ info: 'Statistics for the various system power supplies. Data collected from <a href="https://www.kernel.org/doc/Documentation/power/power_supply_class.txt" target="_blank">Linux power supply class</a>.'
},
'xenstat': {
@@ -514,25 +581,25 @@ netdataDashboard.menu = {
'vsphere': {
title: 'vSphere',
icon: '<i class="fas fa-server"></i>',
- info: 'Performance statistics for ESXI hosts and virtual machines. Data collected from <a href="https://www.vmware.com/products/vcenter-server.html">VMware vCenter Server</a> using <code><a href="https://github.com/vmware/govmomi"> govmomi</a></code> library.'
+ info: 'Performance statistics for ESXI hosts and virtual machines. Data collected from <a href="https://www.vmware.com/products/vcenter-server.html" target="_blank">VMware vCenter Server</a> using <code><a href="https://github.com/vmware/govmomi"> govmomi</a></code> library.'
},
'vcsa': {
title: 'VCSA',
icon: '<i class="fas fa-server"></i>',
- info: 'vCenter Server Appliance health statistics. Data collected from <a href="https://vmware.github.io/vsphere-automation-sdk-rest/vsphere/index.html#SVC_com.vmware.appliance.health">Health API</a>.'
+ info: 'vCenter Server Appliance health statistics. Data collected from <a href="https://vmware.github.io/vsphere-automation-sdk-rest/vsphere/index.html#SVC_com.vmware.appliance.health" target="_blank">Health API</a>.'
},
'zookeeper': {
title: 'Zookeeper',
icon: '<i class="fas fa-database"></i>',
- info: 'Provides health statistics for <b><a href="https://zookeeper.apache.org/">Zookeeper</a></b> server. Data collected through the command port using <code><a href="https://zookeeper.apache.org/doc/r3.5.5/zookeeperAdmin.html#sc_zkCommands">mntr</a></code> command.'
+ info: 'Provides health statistics for <b><a href="https://zookeeper.apache.org/" target="_blank">Zookeeper</a></b> server. Data collected through the command port using <code><a href="https://zookeeper.apache.org/doc/r3.5.5/zookeeperAdmin.html#sc_zkCommands">mntr</a></code> command.'
},
'hdfs': {
title: 'HDFS',
icon: '<i class="fas fa-folder-open"></i>',
- info: 'Provides <b><a href="https://hadoop.apache.org/docs/r3.2.0/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Hadoop Distributed File System</a></b> performance statistics. Module collects metrics over <code>Java Management Extensions</code> through the web interface of an <code>HDFS</code> daemon.'
+ info: 'Provides <b><a href="https://hadoop.apache.org/docs/r3.2.0/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html" target="_blank">Hadoop Distributed File System</a></b> performance statistics. Module collects metrics over <code>Java Management Extensions</code> through the web interface of an <code>HDFS</code> daemon.'
},
'am2320': {
@@ -573,13 +640,13 @@ netdataDashboard.menu = {
'vernemq': {
title: 'VerneMQ',
icon: '<i class="fas fa-comments"></i>',
- info: 'Performance data for the <b><a href="https://vernemq.com/">VerneMQ</a></b> open-source MQTT broker.'
+ info: 'Performance data for the <b><a href="https://vernemq.com/" target="_blank">VerneMQ</a></b> open-source MQTT broker.'
},
'pulsar': {
title: 'Pulsar',
icon: '<i class="fas fa-comments"></i>',
- info: 'Summary, namespaces and topics performance data for the <b><a href="http://pulsar.apache.org/">Apache Pulsar</a></b> pub-sub messaging system.'
+ info: 'Summary, namespaces and topics performance data for the <b><a href="http://pulsar.apache.org/" target="_blank">Apache Pulsar</a></b> pub-sub messaging system.'
},
'anomalies': {
@@ -597,13 +664,13 @@ netdataDashboard.menu = {
'statsd': {
title: 'StatsD',
icon: '<i class="fas fa-chart-line"></i>',
- info:'StatsD is an industry-standard technology stack for monitoring applications and instrumenting any piece of software to deliver custom metrics. Netdata allows the user to organize the metrics in different charts and visualize any application metric easily. Read more on <a href="https://learn.netdata.cloud/docs/agent/collectors/statsd.plugin">Netdata Learn</a>.'
+ info:'StatsD is an industry-standard technology stack for monitoring applications and instrumenting any piece of software to deliver custom metrics. Netdata allows the user to organize the metrics in different charts and visualize any application metric easily. Read more on <a href="https://learn.netdata.cloud/docs/agent/collectors/statsd.plugin" target="_blank">Netdata Learn</a>.'
},
'supervisord': {
title: 'Supervisord',
icon: '<i class="fas fa-tasks"></i>',
- info: 'Detailed statistics for each group of processes controlled by <b><a href="http://supervisord.org/">Supervisor</a></b>. ' +
+ info: 'Detailed statistics for each group of processes controlled by <b><a href="http://supervisord.org/" target="_blank">Supervisor</a></b>. ' +
'Netdata collects these metrics using <a href="http://supervisord.org/api.html#supervisor.rpcinterface.SupervisorNamespaceRPCInterface.getAllProcessInfo" target="_blank"><code>getAllProcessInfo</code></a> method.'
},
@@ -631,6 +698,11 @@ netdataDashboard.menu = {
info: 'Z scores scores relating to key system metrics.'
},
+ 'anomaly_detection': {
+ title: 'Anomaly Detection',
+ icon: '<i class="fas fa-brain"></i>',
+ info: 'Charts relating to anomaly detection, increased <code>anomalous</code> dimensions or a higher than usual <code>anomaly_rate</code> could be signs of some abnormal behaviour. Read our <a href="https://learn.netdata.cloud/guides/monitor/anomaly-detection" target="_blank">anomaly detection guide</a> for more details.'
+ },
};
@@ -697,7 +769,11 @@ netdataDashboard.submenu = {
'mem.ksm': {
title: 'deduper (ksm)',
- info: 'Kernel Same-page Merging (KSM) performance monitoring, read from several files in <code>/sys/kernel/mm/ksm/</code>. KSM is a memory-saving de-duplication feature in the Linux kernel (since version 2.6.32). The KSM daemon ksmd periodically scans those areas of user memory which have been registered with it, looking for pages of identical content which can be replaced by a single write-protected page (which is automatically copied if a process later wants to update its content). KSM was originally developed for use with KVM (where it was known as Kernel Shared Memory), to fit more virtual machines into physical memory, by sharing the data common between them. But it can be useful to any application which generates many instances of the same data.'
+ info: '<a href="https://en.wikipedia.org/wiki/Kernel_same-page_merging" target="_blank">Kernel Same-page Merging</a> '+
+ '(KSM) performance monitoring, read from several files in <code>/sys/kernel/mm/ksm/</code>. '+
+ 'KSM is a memory-saving de-duplication feature in the Linux kernel. '+
+ 'The KSM daemon ksmd periodically scans those areas of user memory which have been registered with it, '+
+ 'looking for pages of identical content which can be replaced by a single write-protected page.'
},
'mem.hugepages': {
@@ -708,8 +784,51 @@ netdataDashboard.submenu = {
info: 'Non-Uniform Memory Access (NUMA) is a hierarchical memory design the memory access time is dependent on locality. Under NUMA, a processor can access its own local memory faster than non-local memory (memory local to another processor or memory shared between processors). The individual metrics are described in the <a href="https://www.kernel.org/doc/Documentation/numastat.txt" target="_blank">Linux kernel documentation</a>.'
},
+ 'mem.ecc': {
+ info: '<p><a href="https://en.wikipedia.org/wiki/ECC_memory" target="_blank">ECC memory</a> '+
+ 'is a type of computer data storage that uses an error correction code (ECC) to detect '+
+ 'and correct n-bit data corruption which occurs in memory. '+
+ 'Typically, ECC memory maintains a memory system immune to single-bit errors: '+
+ 'the data that is read from each word is always the same as the data that had been written to it, '+
+ 'even if one of the bits actually stored has been flipped to the wrong state.</p>'+
+ '<p>Memory errors can be classified into two types: '+
+ '<b>Soft errors</b>, which randomly corrupt bits but do not leave physical damage. '+
+ 'Soft errors are transient in nature and are not repeatable, can be because of electrical or '+
+ 'magnetic interference. '+
+ '<b>Hard errors</b>, which corrupt bits in a repeatable manner because '+
+ 'of a physical/hardware defect or an environmental problem.'
+ },
+
+ 'mem.pagetype': {
+ info: 'Statistics of free memory available from '+
+ '<a href="https://en.wikipedia.org/wiki/Buddy_memory_allocation" target="_blank">memory buddy allocator</a>. '+
+ 'The buddy allocator is the system memory allocator. '+
+ 'The whole memory space is split in physical pages, which are grouped by '+
+ 'NUMA node, zone, '+
+ '<a href="https://lwn.net/Articles/224254/" target="_blank">migrate type</a>, and size of the block. '+
+ 'By keeping pages grouped based on their ability to move, '+
+ 'the kernel can reclaim pages within a page block to satisfy a high-order allocation. '+
+ 'When the kernel or an application requests some memory, the buddy allocator provides a page that matches closest the request.'
+ },
+
'ip.ecn': {
- info: '<a href="https://en.wikipedia.org/wiki/Explicit_Congestion_Notification" target="_blank">Explicit Congestion Notification (ECN)</a> is a TCP extension that allows end-to-end notification of network congestion without dropping packets. ECN is an optional feature that may be used between two ECN-enabled endpoints when the underlying network infrastructure also supports it.'
+ info: '<a href="https://en.wikipedia.org/wiki/Explicit_Congestion_Notification" target="_blank">Explicit Congestion Notification (ECN)</a> '+
+ 'is an extension to the IP and to the TCP that allows end-to-end notification of network congestion without dropping packets. '+
+ 'ECN is an optional feature that may be used between two ECN-enabled endpoints when '+
+ 'the underlying network infrastructure also supports it.'
+ },
+
+ 'ip.multicast': {
+ info: '<a href="https://en.wikipedia.org/wiki/Multicast" target="_blank">IP multicast</a> is a technique for '+
+ 'one-to-many communication over an IP network. '+
+ 'Multicast uses network infrastructure efficiently by requiring the source to send a packet only once, '+
+ 'even if it needs to be delivered to a large number of receivers. '+
+ 'The nodes in the network take care of replicating the packet to reach multiple receivers only when necessary.'
+ },
+ 'ip.broadcast': {
+ info: 'In computer networking, '+
+ '<a href="https://en.wikipedia.org/wiki/Broadcasting_(networking)" target="_blank">broadcasting</a> refers to transmitting a packet that will be received by every device on the network. '+
+ 'In practice, the scope of the broadcast is limited to a broadcast domain.'
},
'netfilter.conntrack': {
@@ -724,7 +843,15 @@ netdataDashboard.submenu = {
'netfilter.synproxy': {
title: 'DDoS protection',
- info: 'DDoS protection performance metrics. <a href="https://github.com/firehol/firehol/wiki/Working-with-SYNPROXY" target="_blank">SYNPROXY</a> is a TCP SYN packets proxy. It is used to protect any TCP server (like a web server) from SYN floods and similar DDoS attacks. It is a netfilter module, in the Linux kernel (since version 3.12). It is optimized to handle millions of packets per second utilizing all CPUs available without any concurrency locking between the connections. It can be used for any kind of TCP traffic (even encrypted), since it does not interfere with the content itself.'
+ info: 'DDoS protection performance metrics. <a href="https://github.com/firehol/firehol/wiki/Working-with-SYNPROXY" target="_blank">SYNPROXY</a> '+
+ 'is a TCP SYN packets proxy. '+
+ 'It is used to protect any TCP server (like a web server) from SYN floods and similar DDoS attacks. '+
+ 'SYNPROXY intercepts new TCP connections and handles the initial 3-way handshake using syncookies '+
+ 'instead of conntrack to establish the connection. '+
+ 'It is optimized to handle millions of packets per second utilizing all CPUs available without '+
+ 'any concurrency locking between the connections. '+
+ 'It can be used for any kind of TCP traffic (even encrypted), '+
+ 'since it does not interfere with the content itself.'
},
'ipfw.dynamic_rules': {
@@ -736,17 +863,43 @@ netdataDashboard.submenu = {
title: 'softnet',
info: function (os) {
if (os === 'linux')
- return 'Statistics for CPUs SoftIRQs related to network receive work. Break down per CPU core can be found at <a href="#menu_cpu_submenu_softnet_stat">CPU / softnet statistics</a>. <b>processed</b> states the number of packets processed, <b>dropped</b> is the number packets dropped because the network device backlog was full (to fix them on Linux use <code>sysctl</code> to increase <code>net.core.netdev_max_backlog</code>), <b>squeezed</b> is the number of packets dropped because the network device budget ran out (to fix them on Linux use <code>sysctl</code> to increase <code>net.core.netdev_budget</code> and/or <code>net.core.netdev_budget_usecs</code>). More information about identifying and troubleshooting network driver related issues can be found at <a href="https://access.redhat.com/sites/default/files/attachments/20150325_network_performance_tuning.pdf" target="_blank">Red Hat Enterprise Linux Network Performance Tuning Guide</a>.';
+ return '<p>Statistics for CPUs SoftIRQs related to network receive work. '+
+ 'Break down per CPU core can be found at <a href="#menu_cpu_submenu_softnet_stat">CPU / softnet statistics</a>. '+
+ 'More information about identifying and troubleshooting network driver related issues can be found at '+
+ '<a href="https://access.redhat.com/sites/default/files/attachments/20150325_network_performance_tuning.pdf" target="_blank">Red Hat Enterprise Linux Network Performance Tuning Guide</a>.</p>'+
+ '<p><b>Processed</b> - packets processed. '+
+ '<b>Dropped</b> - packets dropped because the network device backlog was full. '+
+ '<b>Squeezed</b> - number of times the network device budget was consumed or the time limit was reached, '+
+ 'but more work was available. '+
+ '<b>ReceivedRPS</b> - number of times this CPU has been woken up to process packets via an Inter-processor Interrupt. '+
+ '<b>FlowLimitCount</b> - number of times the flow limit has been reached (flow limiting is an optional '+
+ 'Receive Packet Steering feature).</p>';
else
return 'Statistics for CPUs SoftIRQs related to network receive work.';
}
},
+ 'system.clock synchronization': {
+ info: '<a href="https://en.wikipedia.org/wiki/Network_Time_Protocol" target="_blank">NTP</a> '+
+ 'lets you automatically sync your system time with a remote server. '+
+ 'This keeps your machine’s time accurate by syncing with servers that are known to have accurate times.'
+ },
+
'cpu.softnet_stat': {
title: 'softnet',
info: function (os) {
if (os === 'linux')
- return 'Statistics for per CPUs core SoftIRQs related to network receive work. Total for all CPU cores can be found at <a href="#menu_system_submenu_softnet_stat">System / softnet statistics</a>. <b>processed</b> states the number of packets processed, <b>dropped</b> is the number packets dropped because the network device backlog was full (to fix them on Linux use <code>sysctl</code> to increase <code>net.core.netdev_max_backlog</code>), <b>squeezed</b> is the number of packets dropped because the network device budget ran out (to fix them on Linux use <code>sysctl</code> to increase <code>net.core.netdev_budget</code> and/or <code>net.core.netdev_budget_usecs</code>). More information about identifying and troubleshooting network driver related issues can be found at <a href="https://access.redhat.com/sites/default/files/attachments/20150325_network_performance_tuning.pdf" target="_blank">Red Hat Enterprise Linux Network Performance Tuning Guide</a>.';
+ return '<p>Statistics for CPUs SoftIRQs related to network receive work. '+
+ 'Total for all CPU cores can be found at <a href="#menu_system_submenu_softnet_stat">System / softnet statistics</a>. '+
+ 'More information about identifying and troubleshooting network driver related issues can be found at '+
+ '<a href="https://access.redhat.com/sites/default/files/attachments/20150325_network_performance_tuning.pdf" target="_blank">Red Hat Enterprise Linux Network Performance Tuning Guide</a>.</p>'+
+ '<p><b>Processed</b> - packets processed. '+
+ '<b>Dropped</b> - packets dropped because the network device backlog was full. '+
+ '<b>Squeezed</b> - number of times the network device budget was consumed or the time limit was reached, '+
+ 'but more work was available. '+
+ '<b>ReceivedRPS</b> - number of times this CPU has been woken up to process packets via an Inter-processor Interrupt. '+
+ '<b>FlowLimitCount</b> - number of times the flow limit has been reached (flow limiting is an optional '+
+ 'Receive Packet Steering feature).</p>';
else
return 'Statistics for per CPUs core SoftIRQs related to network receive work. Total for all CPU cores can be found at <a href="#menu_system_submenu_softnet_stat">System / softnet statistics</a>.';
}
@@ -773,7 +926,7 @@ netdataDashboard.submenu = {
'couchdb.perdbstats': {
title: 'per db statistics',
- info: 'Statistics per database. This includes <a href="http://docs.couchdb.org/en/latest/api/database/common.html#get--db">3 size graphs per database</a>: active (the size of live data in the database), external (the uncompressed size of the database contents), and file (the size of the file on disk, exclusive of any views and indexes). It also includes the number of documents and number of deleted documents per database.'
+ info: 'Statistics per database. This includes <a href="http://docs.couchdb.org/en/latest/api/database/common.html#get--db" target="_blank">3 size graphs per database</a>: active (the size of live data in the database), external (the uncompressed size of the database contents), and file (the size of the file on disk, exclusive of any views and indexes). It also includes the number of documents and number of deleted documents per database.'
},
'couchdb.erlang': {
@@ -783,15 +936,151 @@ netdataDashboard.submenu = {
'ntpd.system': {
title: 'system',
- info: 'Statistics of the system variables as shown by the readlist billboard <code>ntpq -c rl</code>. System variables are assigned an association ID of zero and can also be shown in the readvar billboard <code>ntpq -c "rv 0"</code>. These variables are used in the <a href="http://doc.ntp.org/current-stable/discipline.html">Clock Discipline Algorithm</a>, to calculate the lowest and most stable offset.'
+ info: 'Statistics of the system variables as shown by the readlist billboard <code>ntpq -c rl</code>. System variables are assigned an association ID of zero and can also be shown in the readvar billboard <code>ntpq -c "rv 0"</code>. These variables are used in the <a href="http://doc.ntp.org/current-stable/discipline.html" target="_blank">Clock Discipline Algorithm</a>, to calculate the lowest and most stable offset.'
},
'ntpd.peers': {
title: 'peers',
info: 'Statistics of the peer variables for each peer configured in <code>/etc/ntp.conf</code> as shown by the readvar billboard <code>ntpq -c "rv &lt;association&gt;"</code>, while each peer is assigned a nonzero association ID as shown by <code>ntpq -c "apeers"</code>. The module periodically scans for new/changed peers (default: every 60s). <b>ntpd</b> selects the best possible peer from the available peers to synchronize the clock. A minimum of at least 3 peers is required to properly identify the best possible peer.'
- }
-};
+ },
+
+ 'mem.page_cache': {
+ title: 'page cache (eBPF)',
+ info: 'Monitor calls to functions used to manipulate <a href="https://en.wikipedia.org/wiki/Page_cache" target="_blank">Linux page cache</a>. When integration with apps is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata also shows page cache manipulation per <a href="#menu_apps_submenu_page_cache">application</a>.'
+ },
+
+ 'apps.page_cache': {
+ title: 'page cache (eBPF)',
+ info: 'Netdata also gives a summary for these charts in <a href="#menu_mem_submenu_page_cache">Memory submenu</a>.'
+ },
+
+ 'filesystem.vfs': {
+ title: 'vfs (eBPF)',
+ info: 'Monitor calls to functions used to manipulate <a href="https://learn.netdata.cloud/docs/agent/collectors/ebpf.plugin#vfs" target="_blank">File Systems</a>. When integration with apps is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata also shows Virtual File System per <a href="#menu_apps_submenu_vfs">application</a>.'
+ },
+
+ 'apps.vfs': {
+ title: 'vfs (eBPF)',
+ info: 'Netdata also gives a summary for these charts in <a href="#menu_filesystem_submenu_vfs">Filesystem submenu</a>.'
+ },
+
+ 'filesystem.ext4_latency': {
+ title: 'ext4 latency (eBPF)',
+ info: 'Latency is the time it takes for an event to be completed. We calculate the difference between the calling and return times, this spans disk I/O, file system operations (lock, I/O), run queue latency and all events related to the monitored action. Based on the eBPF <a href="http://www.brendangregg.com/blog/2016-10-06/linux-bcc-ext4dist-ext4slower.html" target="_blank">ext4dist</a> from BCC tools.'
+ },
+
+ 'filesystem.xfs_latency': {
+ title: 'xfs latency (eBPF)',
+ info: 'Latency is the time it takes for an event to be completed. We calculate the difference between the calling and return times, this spans disk I/O, file system operations (lock, I/O), run queue latency and all events related to the monitored action. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/xfsdist_example.txt" target="_blank">xfsdist</a> from BCC tools.'
+ },
+
+ 'filesystem.nfs_latency': {
+ title: 'nfs latency (eBPF)',
+ info: 'Latency is the time it takes for an event to be completed. We calculate the difference between the calling and return times, this spans disk I/O, file system operations (lock, I/O), run queue latency and all events related to the monitored action. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/nfsdist_example.txt" target="_blank">nfsdist</a> from BCC tools.'
+ },
+
+ 'filesystem.zfs_latency': {
+ title: 'zfs latency (eBPF)',
+ info: 'Latency is the time it takes for an event to be completed. We calculate the difference between the calling and return times, this spans disk I/O, file system operations (lock, I/O), run queue latency and all events related to the monitored action. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/zfsdist_example.txt" target="_blank">zfsdist</a> from BCC tools.'
+ },
+
+ 'filesystem.btrfs_latency': {
+ title: 'btrfs latency (eBPF)',
+ info: 'Latency is the time it takes for an event to be completed. We calculate the difference between the calling and return times, we get the logarithmic for the final result and we sum one value to the respective bin. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/btrfsdist_example.txt" target="_blank">btrfsdist</a> from BCC tools.'
+ },
+
+ 'filesystem.file_access': {
+ title: 'file access (eBPF)',
+ info: 'When integration with apps is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata also shows file access per <a href="#menu_apps_submenu_file_access">application</a>.'
+ },
+
+ 'apps.file_access': {
+ title: 'file access (eBPF)',
+ info: 'Netdata also gives a summary for this chart on <a href="#menu_filesystem_submenu_file_access">Filesystem submenu</a> (more details on <a href="https://learn.netdata.cloud/docs/agent/collectors/ebpf.plugin#file" target="_blank">eBPF plugin file chart section</a>).'
+ },
+
+ 'ip.kernel': {
+ title: 'kernel functions (eBPF)',
+ info: 'Next charts are made when <code>ebpf.plugin</code> is running on your host. When integration with apps is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata also shows calls for kernel functions per <a href="#menu_apps_submenu_net">application</a>.'
+ },
+
+ 'apps.net': {
+ title: 'network',
+ info: 'Netdata also gives a summary for eBPF charts in <a href="#menu_ip_submenu_kernel">Networking Stack submenu</a>.'
+ },
+
+ 'system.ipc semaphores': {
+ info: 'System V semaphores is an inter-process communication (IPC) mechanism. '+
+ 'It allows processes or threads within a process to synchronize their actions. '+
+ 'They are often used to monitor and control the availability of system resources such as shared memory segments. ' +
+ 'For details, see <a href="https://man7.org/linux/man-pages/man7/svipc.7.html" target="_blank">svipc(7)</a>. ' +
+ 'To see the host IPC semaphore information, run <code>ipcs -us</code>. For limits, run <code>ipcs -ls</code>.'
+ },
+
+ 'system.ipc shared memory': {
+ info: 'System V shared memory is an inter-process communication (IPC) mechanism. '+
+ 'It allows processes to communicate information by sharing a region of memory. '+
+ 'It is the fastest form of inter-process communication available since no kernel involvement occurs when data is passed between the processes (no copying). '+
+ 'Typically, processes must synchronize their access to a shared memory object, using, for example, POSIX semaphores. '+
+ 'For details, see <a href="https://man7.org/linux/man-pages/man7/svipc.7.html" target="_blank">svipc(7)</a>. '+
+ 'To see the host IPC shared memory information, run <code>ipcs -um</code>. For limits, run <code>ipcs -lm</code>.'
+ },
+
+ 'system.ipc message queues': {
+ info: 'System V message queues is an inter-process communication (IPC) mechanism. '+
+ 'It allow processes to exchange data in the form of messages. '+
+ 'For details, see <a href="https://man7.org/linux/man-pages/man7/svipc.7.html" target="_blank">svipc(7)</a>. ' +
+ 'To see the host IPC messages information, run <code>ipcs -uq</code>. For limits, run <code>ipcs -lq</code>.'
+ },
+
+ 'system.interrupts': {
+ info: '<a href="https://en.wikipedia.org/wiki/Interrupt" target="_blank"><b>Interrupts</b></a> are signals '+
+ 'sent to the CPU by external devices (normally I/O devices) or programs (running processes). '+
+ 'They tell the CPU to stop its current activities and execute the appropriate part of the operating system. '+
+ 'Interrupt types are '+
+ '<b>hardware</b> (generated by hardware devices to signal that they need some attention from the OS), '+
+ '<b>software</b> (generated by programs when they want to request a system call to be performed by the operating system), and '+
+ '<b>traps</b> (generated by the CPU itself to indicate that some error or condition occurred for which assistance from the operating system is needed).'
+ },
+
+ 'system.softirqs': {
+ info: 'Software interrupts (or "softirqs") are one of the oldest deferred-execution mechanisms in the kernel. '+
+ 'Several tasks among those executed by the kernel are not critical: '+
+ 'they can be deferred for a long period of time, if necessary. '+
+ 'The deferrable tasks can execute with all interrupts enabled '+
+ '(softirqs are patterned after hardware interrupts). '+
+ 'Taking them out of the interrupt handler helps keep kernel response time small.'
+ },
+
+ 'cpu.softirqs': {
+ info: 'Total number of software interrupts per CPU. '+
+ 'To see the total number for the system check the <a href="#menu_system_submenu_softirqs">softirqs</a> section.'
+ },
+
+ 'cpu.interrupts': {
+ info: 'Total number of interrupts per CPU. '+
+ 'To see the total number for the system check the <a href="#menu_system_submenu_interrupts">interrupts</a> section. '+
+ 'The last column in <code>/proc/interrupts</code> provides an interrupt description or the device name that registered the handler for that interrupt.'
+ },
+
+ 'cpu.throttling': {
+ info: ' CPU throttling is commonly used to automatically slow down the computer '+
+ 'when possible to use less energy and conserve battery.'
+ },
+ 'cpu.cpuidle': {
+ info: '<a href="https://en.wikipedia.org/wiki/Advanced_Configuration_and_Power_Interface#Processor_states" target="_blank">Idle States (C-states)</a> '+
+ 'are used to save power when the processor is idle.'
+ },
+
+ 'services.net': {
+ title: 'network (eBPF)',
+ },
+
+ 'services.page_cache': {
+ title: 'pache cache (eBPF)',
+ },
+};
// ----------------------------------------------------------------------------
// chart
@@ -815,7 +1104,14 @@ netdataDashboard.context = {
void (os);
return 'Total CPU utilization (all cores). 100% here means there is no CPU idle time at all. You can get per core usage at the <a href="#menu_cpu">CPUs</a> section and per application usage at the <a href="#menu_apps">Applications Monitoring</a> section.'
+ netdataDashboard.sparkline('<br/>Keep an eye on <b>iowait</b> ', 'system.cpu', 'iowait', '%', '. If it is constantly high, your disks are a bottleneck and they slow your system down.')
- + netdataDashboard.sparkline('<br/>An important metric worth monitoring, is <b>softirq</b> ', 'system.cpu', 'softirq', '%', '. A constantly high percentage of softirq may indicate network driver issues.');
+ + netdataDashboard.sparkline(
+ '<br/>An important metric worth monitoring, is <b>softirq</b> ',
+ 'system.cpu',
+ 'softirq',
+ '%',
+ '. A constantly high percentage of softirq may indicate network driver issues. '+
+ 'The individual metrics can be found in the '+
+ '<a href="https://www.kernel.org/doc/html/latest/filesystems/proc.html#miscellaneous-kernel-statistics-in-proc-stat" target="_blank">kernel documentation</a>.');
},
valueRange: "[0, 100]"
},
@@ -826,14 +1122,14 @@ netdataDashboard.context = {
},
'system.cpu_pressure': {
- info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' +
+ info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a> ' +
'identifies and quantifies the disruptions caused by resource contentions. ' +
'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on CPU. ' +
'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
},
'system.memory_some_pressure': {
- info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' +
+ info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a> ' +
'identifies and quantifies the disruptions caused by resource contentions. ' +
'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on memory. ' +
'The "full" line indicates the share of time in which <b>all non-idle</b> tasks are stalled on memory simultaneously. ' +
@@ -842,7 +1138,7 @@ netdataDashboard.context = {
},
'system.io_some_pressure': {
- info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' +
+ info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a> ' +
'identifies and quantifies the disruptions caused by resource contentions. ' +
'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on I/O. ' +
'The "full" line indicates the share of time in which <b>all non-idle</b> tasks are stalled on I/O simultaneously. ' +
@@ -866,7 +1162,9 @@ netdataDashboard.context = {
},
'system.swapio': {
- info: 'Total Swap I/O. (netdata measures both <code>in</code> and <code>out</code>. If either of the metrics <code>in</code> or <code>out</code> is not shown in the chart, the reason is that the metric is zero. - you can change the page settings to always render all the available dimensions on all charts).'
+ info: '<p>System swap I/O.</p>'+
+ '<b>In</b> - pages the system has swapped in from disk to RAM. '+
+ '<b>Out</b> - pages the system has swapped out from RAM to disk.'
},
'system.pgfaults': {
@@ -879,33 +1177,67 @@ netdataDashboard.context = {
},
'system.clock_sync_state': {
- info: 'State map: 0 - not synchronized, 1 - synchronized'
+ info:'<p>The system clock synchronization state. '+
+ 'It is strongly recommended having the clock in sync with reliable NTP servers. Otherwise, '+
+ 'it leads to unpredictable problems. '+
+ 'It can take several minutes (usually up to 17) before NTP daemon selects a server to synchronize with. '+
+ '<p><b>State map</b>: 0 - not synchronized, 1 - synchronized.</p>'
+ },
+
+ 'system.clock_sync_offset': {
+ info: 'A typical NTP client regularly polls one or more NTP servers. '+
+ 'The client must compute its '+
+ '<a href="https://en.wikipedia.org/wiki/Network_Time_Protocol#Clock_synchronization_algorithm" target="_blank">time offset</a> '+
+ 'and round-trip delay. '+
+ 'Time offset is the difference in absolute time between the two clocks.'
},
'system.forks': {
colors: '#5555DD',
- info: 'Number of new processes created.'
+ info: 'The number of new processes created.'
},
'system.intr': {
colors: '#DD5555',
- info: 'Total number of CPU interrupts. Check <code>system.interrupts</code> that gives more detail about each interrupt and also the <a href="#menu_cpu">CPUs</a> section where interrupts are analyzed per CPU core.'
+ info: 'Total number of CPU interrupts. Check <code>system.interrupts</code> that gives more detail about each interrupt and also the <a href="#menu_cpu">CPUs</a> section where interrupts are analyzed <a href="#menu_cpu_submenu_interrupts">per CPU core</a>.'
},
'system.interrupts': {
- info: 'CPU interrupts in detail. At the <a href="#menu_cpu">CPUs</a> section, interrupts are analyzed per CPU core.'
+ info: 'CPU interrupts in detail. At the <a href="#menu_cpu">CPUs</a> section, interrupts are analyzed <a href="#menu_cpu_submenu_interrupts">per CPU core</a>. '+
+ 'The last column in <code>/proc/interrupts</code> provides an interrupt description or the device name that registered the handler for that interrupt.'
+ },
+
+ 'system.hardirq_latency': {
+ info: 'Total time spent servicing hardware interrupts. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/hardirqs_example.txt" target="_blank">hardirqs</a> from BCC tools.'
},
'system.softirqs': {
- info: 'CPU softirqs in detail. At the <a href="#menu_cpu">CPUs</a> section, softirqs are analyzed per CPU core.'
+ info: '<p>Total number of software interrupts in the system. '+
+ 'At the <a href="#menu_cpu">CPUs</a> section, softirqs are analyzed <a href="#menu_cpu_submenu_softirqs">per CPU core</a>.</p>'+
+ '<p><b>HI</b> - high priority tasklets. '+
+ '<b>TIMER</b> - tasklets related to timer interrupts. '+
+ '<b>NET_TX</b>, <b>NET_RX</b> - used for network transmit and receive processing. '+
+ '<b>BLOCK</b> - handles block I/O completion events. '+
+ '<b>IRQ_POLL</b> - used by the IO subsystem to increase performance (a NAPI like approach for block devices). '+
+ '<b>TASKLET</b> - handles regular tasklets. '+
+ '<b>SCHED</b> - used by the scheduler to perform load-balancing and other scheduling tasks. '+
+ '<b>HRTIMER</b> - used for high-resolution timers. '+
+ '<b>RCU</b> - performs read-copy-update (RCU) processing.</p>'
+
+ },
+
+ 'system.softirq_latency': {
+ info: 'Total time spent servicing software interrupts. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/softirqs_example.txt" target="_blank">softirqs</a> from BCC tools.'
},
'system.processes': {
- info: 'System processes. <b>Running</b> are the processes in the CPU. <b>Blocked</b> are processes that are willing to enter the CPU, but they cannot, e.g. because they wait for disk activity.'
+ info: '<p>System processes.</p>'+
+ '<p><b>Running</b> - running or ready to run (runnable). '+
+ '<b>Blocked</b> - currently blocked, waiting for I/O to complete.</p>'
},
'system.active_processes': {
- info: 'All system processes.'
+ info: 'The total number of processes in the system.'
},
'system.ctxt': {
@@ -947,6 +1279,67 @@ netdataDashboard.context = {
info: 'System swap memory usage. Swap space is used when the amount of physical memory (RAM) is full. When the system needs more memory resources and the RAM is full, inactive pages in memory are moved to the swap space (usually a disk, a disk partition or a file).'
},
+ 'system.swapcalls': {
+ info: 'Monitor calls to functions <code>swap_readpage</code> and <code>swap_writepage</code>. When integration with apps is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata also shows swap access per <a href="#menu_apps_submenu_swap">application</a>.'
+ },
+
+ 'system.ipc_semaphores': {
+ info: 'Number of allocated System V IPC semaphores. '+
+ 'The system-wide limit on the number of semaphores in all semaphore sets is specified in <code>/proc/sys/kernel/sem</code> file (2nd field).'
+ },
+
+ 'system.ipc_semaphore_arrays': {
+ info: 'Number of used System V IPC semaphore arrays (sets). Semaphores support semaphore sets where each one is a counting semaphore. '+
+ 'So when an application requests semaphores, the kernel releases them in sets. '+
+ 'The system-wide limit on the maximum number of semaphore sets is specified in <code>/proc/sys/kernel/sem</code> file (4th field).'
+ },
+
+ 'system.shared_memory_segments': {
+ info: 'Number of allocated System V IPC memory segments. '+
+ 'The system-wide maximum number of shared memory segments that can be created is specified in <code>/proc/sys/kernel/shmmni</code> file.'
+ },
+
+ 'system.shared_memory_bytes': {
+ info: 'Amount of memory currently used by System V IPC memory segments. '+
+ 'The run-time limit on the maximum shared memory segment size that can be created is specified in <code>/proc/sys/kernel/shmmax</code> file.'
+ },
+
+ 'system.shared_memory_calls': {
+ info: 'Monitor calls to functions <code>shmget</code>, <code>shmat</code>, <code>shmdt</code>, and <code>shmctl</code>. When integration with apps is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata also shows shared memory system call usage <a href="#menu_apps_submenu_ipc_shared_memory">per application</a>.'
+ },
+
+ 'system.message_queue_messages': {
+ info: 'Number of messages that are currently present in System V IPC message queues.'
+ },
+
+ 'system.message_queue_bytes': {
+ info: 'Amount of memory currently used by messages in System V IPC message queues.'
+ },
+
+ 'system.uptime': {
+ info: 'The amount of time the system has been running, including time spent in suspend.'
+ },
+
+ 'system.process_thread': {
+ title : 'Task creation',
+ info: 'Number of times that either <a href="https://www.ece.uic.edu/~yshi1/linux/lkse/node4.html#SECTION00421000000000000000" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, is called to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads monitoring tracepoint <code>sched_process_fork</code>. This chart is provided by eBPF plugin.'
+ },
+
+ 'system.exit': {
+ title : 'Exit monitoring',
+ info: 'Calls for the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks. This chart is provided by eBPF plugin.'
+ },
+
+ 'system.task_error': {
+ title : 'Task error',
+ info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
+ },
+
+ 'system.process_status': {
+ title : 'Task status',
+ info: 'Difference between the number of process created and the number of threads created per period(<code>process</code> dimension), it also shows the number of possible zombie process running on system. This chart is provided by eBPF plugin.'
+ },
+
// ------------------------------------------------------------------------
// CPU charts
@@ -971,13 +1364,42 @@ netdataDashboard.context = {
commonMax: true
},
+ 'cpu.core_throttling': {
+ info: 'The number of adjustments made to the clock speed of the CPU based on it\'s core temperature.'
+ },
+
+ 'cpu.package_throttling': {
+ info: 'The number of adjustments made to the clock speed of the CPU based on it\'s package (chip) temperature.'
+ },
+
+ 'cpufreq.cpufreq': {
+ info: 'The frequency measures the number of cycles your CPU executes per second.'
+ },
+
+ 'cpuidle.cpuidle': {
+ info: 'The percentage of time spent in C-states.'
+ },
+
// ------------------------------------------------------------------------
// MEMORY
+ 'mem.ksm': {
+ info: '<p>Memory pages merging statistics. '+
+ 'A high ratio of <b>Sharing</b> to <b>Shared</b> indicates good sharing, '+
+ 'but a high ratio of <b>Unshared</b> to <b>Sharing</b> indicates wasted effort.</p>'+
+ '<p><b>Shared</b> - used shared pages. '+
+ '<b>Unshared</b> - memory no longer shared (pages are unique but repeatedly checked for merging). '+
+ '<b>Sharing</b> - memory currently shared (how many more sites are sharing the pages, i.e. how much saved). '+
+ '<b>Volatile</b> - volatile pages (changing too fast to be placed in a tree).</p>'
+ },
+
'mem.ksm_savings': {
heads: [
netdataDashboard.gaugeChart('Saved', '12%', 'savings', '#0099CC')
- ]
+ ],
+ info: '<p>The amount of memory saved by KSM.</p>'+
+ '<p><b>Savings</b> - saved memory. '+
+ '<b>Offered</b> - memory marked as mergeable.</p>'
},
'mem.ksm_ratios': {
@@ -996,7 +1418,9 @@ netdataDashboard.context = {
+ ' data-points="CHART_DURATION"'
+ ' role="application"></div>';
}
- ]
+ ],
+ info: 'The effectiveness of KSM. '+
+ 'This is the percentage of the mergeable pages that are currently merged.'
},
'mem.zram_usage': {
@@ -1026,7 +1450,12 @@ netdataDashboard.context = {
'mem.pgfaults': {
- info: 'A <a href="https://en.wikipedia.org/wiki/Page_fault" target="_blank">page fault</a> is a type of interrupt, called trap, raised by computer hardware when a running program accesses a memory page that is mapped into the virtual address space, but not actually loaded into main memory. If the page is loaded in memory at the time the fault is generated, but is not marked in the memory management unit as being loaded in memory, then it is called a <b>minor</b> or soft page fault. A <b>major</b> page fault is generated when the system needs to load the memory page from disk or swap memory.'
+ info: '<p>A <a href="https://en.wikipedia.org/wiki/Page_fault" target="_blank">page fault</a> is a type of interrupt, '+
+ 'called trap, raised by computer hardware when a running program accesses a memory page '+
+ 'that is mapped into the virtual address space, but not actually loaded into main memory.</p>'+
+ '</p><b>Minor</b> - the page is loaded in memory at the time the fault is generated, '+
+ 'but is not marked in the memory management unit as being loaded in memory. '+
+ '<b>Major</b> - generated when the system needs to load the memory page from disk or swap memory.</p>'
},
'mem.committed': {
@@ -1034,6 +1463,33 @@ netdataDashboard.context = {
info: 'Committed Memory, is the sum of all memory which has been allocated by processes.'
},
+ 'mem.oom_kill': {
+ info: 'The number of processes killed by '+
+ '<a href="https://en.wikipedia.org/wiki/Out_of_memory" target="_blank">Out of Memory</a> Killer. '+
+ 'The kernel\'s OOM killer is summoned when the system runs short of free memory and '+
+ 'is unable to proceed without killing one or more processes. '+
+ 'It tries to pick the process whose demise will free the most memory while '+
+ 'causing the least misery for users of the system. '+
+ 'This counter also includes processes within containers that have exceeded the memory limit.'
+ },
+
+ 'mem.numa': {
+ info: '<p>NUMA balancing statistics.</p>'+
+ '<p><b>Local</b> - pages successfully allocated on this node, by a process on this node. '+
+ '<b>Foreign</b> - pages initially intended for this node that were allocated to another node instead. '+
+ '<b>Interleave</b> - interleave policy pages successfully allocated to this node. '+
+ '<b>Other</b> - pages allocated on this node, by a process on another node. '+
+ '<b>PteUpdates</b> - base pages that were marked for NUMA hinting faults. '+
+ '<b>HugePteUpdates</b> - transparent huge pages that were marked for NUMA hinting faults. '+
+ 'In Combination with <b>pte_updates</b> the total address space that was marked can be calculated. '+
+ '<b>HintFaults</b> - NUMA hinting faults that were trapped. '+
+ '<b>HintFaultsLocal</b> - hinting faults that were to local nodes. '+
+ 'In combination with <b>HintFaults</b>, the percentage of local versus remote faults can be calculated. '+
+ 'A high percentage of local hinting faults indicates that the workload is closer to being converged. '+
+ '<b>PagesMigrated</b> - pages were migrated because they were misplaced. '+
+ 'As migration is a copying operation, it contributes the largest part of the overhead created by NUMA balancing.</p>'
+ },
+
'mem.available': {
info: 'Available Memory is estimated by the kernel, as the amount of RAM that can be used by userspace processes, without causing swapping.'
},
@@ -1043,11 +1499,19 @@ netdataDashboard.context = {
},
'mem.kernel': {
- info: 'The total amount of memory being used by the kernel. <b>Slab</b> is the amount of memory used by the kernel to cache data structures for its own use. <b>KernelStack</b> is the amount of memory allocated for each task done by the kernel. <b>PageTables</b> is the amount of memory dedicated to the lowest level of page tables (A page table is used to turn a virtual address into a physical memory address). <b>VmallocUsed</b> is the amount of memory being used as virtual address space.'
+ info: '<p>The total amount of memory being used by the kernel.</p>'+
+ '<p><b>Slab</b> - used by the kernel to cache data structures for its own use. '+
+ '<b>KernelStack</b> - allocated for each task done by the kernel. '+
+ '<b>PageTables</b> - dedicated to the lowest level of page tables (A page table is used to turn a virtual address into a physical memory address). '+
+ '<b>VmallocUsed</b> - being used as virtual address space. '+
+ '<b>Percpu</b> - allocated to the per-CPU allocator used to back per-CPU allocations (excludes the cost of metadata). '+
+ 'When you create a per-CPU variable, each processor on the system gets its own copy of that variable.</p>'
},
'mem.slab': {
- info: '<b>Reclaimable</b> is the amount of memory which the kernel can reuse. <b>Unreclaimable</b> can not be reused even when the kernel is lacking memory.'
+ info: '<p><a href="https://en.wikipedia.org/wiki/Slab_allocation" target="_blank">Slab memory</a> statistics.<p>'+
+ '<p><b>Reclaimable</b> - amount of memory which the kernel can reuse. '+
+ '<b>Unreclaimable</b> - can not be reused even when the kernel is lacking memory.</p>'
},
'mem.hugepages': {
@@ -1058,6 +1522,26 @@ netdataDashboard.context = {
info: 'Transparent HugePages (THP) is backing virtual memory with huge pages, supporting automatic promotion and demotion of page sizes. It works for all applications for anonymous memory mappings and tmpfs/shmem.'
},
+ 'mem.hwcorrupt': {
+ info: 'The amount of memory with physical corruption problems, identified by <a href="https://en.wikipedia.org/wiki/ECC_memory" target="_blank">ECC</a> and set aside by the kernel so it does not get used.'
+ },
+
+ 'mem.ecc_ce': {
+ info: 'The number of correctable (single-bit) ECC errors. '+
+ 'These errors do not affect the normal operation of the system '+
+ 'because they are still being corrected. '+
+ 'Periodic correctable errors may indicate that one of the memory modules is slowly failing.'
+ },
+
+ 'mem.ecc_ue': {
+ info: 'The number of uncorrectable (multi-bit) ECC errors. '+
+ 'An uncorrectable error is a fatal issue that will typically lead to an OS crash.'
+ },
+
+ 'mem.pagetype_global': {
+ info: 'The amount of memory available in blocks of certain size.'
+ },
+
'mem.cachestat_ratio': {
info: 'When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. If the entry is there, a page cache hit has occurred and the read is from the cache. If the entry is not there, a page cache miss has occurred and the kernel allocates a new entry and copies in data from the disk. Netdata calculates the percentage of accessed files that are cached on memory. <a href="https://github.com/iovisor/bcc/blob/master/tools/cachestat.py#L126-L138" target="_blank">The ratio</a> is calculated counting the accessed cached pages (without counting dirty pages and pages added because of read misses) divided by total access without dirty pages.'
},
@@ -1091,131 +1575,967 @@ netdataDashboard.context = {
},
'filesystem.dc_hit_ratio': {
- info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>. When integration with apps is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata also shows directory cache per <a href="#menu_apps_submenu_directory_cache__eBPF_">application</a>.'
},
'filesystem.dc_reference': {
info: 'Counters of file accesses. <code>Reference</code> is when there is a file access and the file is not present in the directory cache. <code>Miss</code> is when there is file access and the file is not found in the filesystem. <code>Slow</code> is when there is a file access and the file is present in the filesystem but not in the directory cache. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
},
+ 'md.health': {
+ info: 'Number of failed devices per MD array. '+
+ 'Netdata retrieves this data from the <b>[n/m]</b> field of the md status line. '+
+ 'It means that ideally the array would have <b>n</b> devices however, currently, <b>m</b> devices are in use. '+
+ '<code>failed disks</code> is <b>n-m</b>.'
+ },
+ 'md.disks': {
+ info: 'Number of devices in use and in the down state. '+
+ 'Netdata retrieves this data from the <b>[n/m]</b> field of the md status line. '+
+ 'It means that ideally the array would have <b>n</b> devices however, currently, <b>m</b> devices are in use. '+
+ '<code>inuse</code> is <b>m</b>, <code>down</code> is <b>n-m</b>.'
+ },
+ 'md.status': {
+ info: 'Completion progress of the ongoing operation.'
+ },
+ 'md.expected_time_until_operation_finish': {
+ info: 'Estimated time to complete the ongoing operation. '+
+ 'The time is only an approximation since the operation speed will vary according to other I/O demands.'
+ },
+ 'md.operation_speed': {
+ info: 'Speed of the ongoing operation. '+
+ 'The system-wide rebuild speed limits are specified in <code>/proc/sys/dev/raid/{speed_limit_min,speed_limit_max}</code> files. '+
+ 'These options are good for tweaking rebuilt process and may increase overall system load, cpu and memory usage.'
+ },
+ 'md.mismatch_cnt': {
+ info: 'When performing <b>check</b> and <b>repair</b>, and possibly when performing <b>resync</b>, md will count the number of errors that are found. '+
+ 'A count of mismatches is recorded in the <code>sysfs</code> file <code>md/mismatch_cnt</code>. '+
+ 'This value is the number of sectors that were re-written, or (for <b>check</b>) would have been re-written. '+
+ 'It may be larger than the number of actual errors by a factor of the number of sectors in a page. '+
+ 'Mismatches can not be interpreted very reliably on RAID1 or RAID10, especially when the device is used for swap. '+
+ 'On a truly clean RAID5 or RAID6 array, any mismatches should indicate a hardware problem at some level - '+
+ 'software issues should never cause such a mismatch. '+
+ 'For details, see <a href="https://man7.org/linux/man-pages/man4/md.4.html" target="_blank">md(4)</a>.'
+ },
+ 'md.flush': {
+ info: 'Number of flush counts per MD array. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/mdflush_example.txt" target="_blank">mdflush</a> from BCC tools.'
+ },
+
// ------------------------------------------------------------------------
- // network interfaces
+ // IP
- 'net.drops': {
- info: 'Packets that have been dropped at the network interface level. These are the same counters reported by <code>ifconfig</code> as <code>RX dropped</code> (inbound) and <code>TX dropped</code> (outbound). <b>inbound</b> packets can be dropped at the network interface level due to <a href="#menu_system_submenu_softnet_stat">softnet backlog</a> overflow, bad / unintended VLAN tags, unknown or unregistered protocols, IPv6 frames when the server is not configured for IPv6. Check <a href="https://www.novell.com/support/kb/doc.php?id=7007165" target="_blank">this document</a> for more information.'
+ 'ip.inerrors': {
+ info: '<p>The number of errors encountered during the reception of IP packets.</p>' +
+ '</p><b>NoRoutes</b> - packets that were dropped because there was no route to send them. ' +
+ '<b>Truncated</b> - packets which is being discarded because the datagram frame didn\'t carry enough data. ' +
+ '<b>Checksum</b> - packets that were dropped because they had wrong checksum.</p>'
},
- 'net.duplex': {
- info: 'State map: 0 - unknown, 1 - half duplex, 2 - full duplex'
+ 'ip.mcast': {
+ info: 'Total multicast traffic in the system.'
},
- 'net.operstate': {
- info: 'State map: 0 - unknown, 1 - notpresent, 2 - down, 3 - lowerlayerdown, 4 - testing, 5 - dormant, 6 - up'
+ 'ip.mcastpkts': {
+ info: 'Total transferred multicast packets in the system.'
},
- 'net.carrier': {
- info: 'State map: 0 - down, 1 - up'
+ 'ip.bcast': {
+ info: 'Total broadcast traffic in the system.'
},
- // ------------------------------------------------------------------------
- // IP
+ 'ip.bcastpkts': {
+ info: 'Total transferred broadcast packets in the system.'
+ },
- 'ip.inerrors': {
- info: 'Errors encountered during the reception of IP packets. ' +
- '<code>noroutes</code> (<code>InNoRoutes</code>) counts packets that were dropped because there was no route to send them. ' +
- '<code>truncated</code> (<code>InTruncatedPkts</code>) counts packets which is being discarded because the datagram frame didn\'t carry enough data. ' +
- '<code>checksum</code> (<code>InCsumErrors</code>) counts packets that were dropped because they had wrong checksum. '
+ 'ip.ecnpkts': {
+ info: '<p>Total number of received IP packets with ECN bits set in the system.</p>'+
+ '<p><b>CEP</b> - congestion encountered. '+
+ '<b>NoECTP</b> - non ECN-capable transport. '+
+ '<b>ECTP0</b> and <b>ECTP1</b> - ECN capable transport.</p>'
+ },
+
+ 'ip.tcpreorders': {
+ info: '<p>TCP prevents out-of-order packets by either sequencing them in the correct order or '+
+ 'by requesting the retransmission of out-of-order packets.</p>'+
+ '<p><b>Timestamp</b> - detected re-ordering using the timestamp option. '+
+ '<b>SACK</b> - detected re-ordering using Selective Acknowledgment algorithm. '+
+ '<b>FACK</b> - detected re-ordering using Forward Acknowledgment algorithm. '+
+ '<b>Reno</b> - detected re-ordering using Fast Retransmit algorithm.</p>'
+ },
+
+ 'ip.tcpofo': {
+ info: '<p>TCP maintains an out-of-order queue to keep the out-of-order packets in the TCP communication.</p>'+
+ '<p><b>InQueue</b> - the TCP layer receives an out-of-order packet and has enough memory to queue it. '+
+ '<b>Dropped</b> - the TCP layer receives an out-of-order packet but does not have enough memory, so drops it. '+
+ '<b>Merged</b> - the received out-of-order packet has an overlay with the previous packet. '+
+ 'The overlay part will be dropped. All these packets will also be counted into <b>InQueue</b>. '+
+ '<b>Pruned</b> - packets dropped from out-of-order queue because of socket buffer overrun.</p>'
+ },
+
+ 'ip.tcpsyncookies': {
+ info: '<p><a href="https://en.wikipedia.org/wiki/SYN_cookies" target="_blank">SYN cookies</a> '+
+ 'are used to mitigate SYN flood.</p>'+
+ '<p><b>Received</b> - after sending a SYN cookie, it came back to us and passed the check. '+
+ '<b>Sent</b> - an application was not able to accept a connection fast enough, so the kernel could not store '+
+ 'an entry in the queue for this connection. Instead of dropping it, it sent a SYN cookie to the client. '+
+ '<b>Failed</b> - the MSS decoded from the SYN cookie is invalid. When this counter is incremented, '+
+ 'the received packet won’t be treated as a SYN cookie.</p>'
},
'ip.tcpmemorypressures': {
- info: 'Number of times a socket was put in <b>memory pressure</b> due to a non fatal memory allocation failure (the kernel attempts to work around this situation by reducing the send buffers, etc).'
+ info: 'The number of times a socket was put in memory pressure due to a non fatal memory allocation failure '+
+ '(the kernel attempts to work around this situation by reducing the send buffers, etc).'
},
'ip.tcpconnaborts': {
- info: 'TCP connection aborts. <b>baddata</b> (<code>TCPAbortOnData</code>) happens while the connection is on <code>FIN_WAIT1</code> and the kernel receives a packet with a sequence number beyond the last one for this connection - the kernel responds with <code>RST</code> (closes the connection). <b>userclosed</b> (<code>TCPAbortOnClose</code>) happens when the kernel receives data on an already closed connection and responds with <code>RST</code>. <b>nomemory</b> (<code>TCPAbortOnMemory</code> happens when there are too many orphaned sockets (not attached to an fd) and the kernel has to drop a connection - sometimes it will send an <code>RST</code>, sometimes it won\'t. <b>timeout</b> (<code>TCPAbortOnTimeout</code>) happens when a connection times out. <b>linger</b> (<code>TCPAbortOnLinger</code>) happens when the kernel killed a socket that was already closed by the application and lingered around for long enough. <b>failed</b> (<code>TCPAbortFailed</code>) happens when the kernel attempted to send an <code>RST</code> but failed because there was no memory available.'
+ info: '<p>TCP connection aborts.</p>'+
+ '<p><b>BadData</b> - happens while the connection is on FIN_WAIT1 and the kernel receives a packet '+
+ 'with a sequence number beyond the last one for this connection - '+
+ 'the kernel responds with RST (closes the connection). '+
+ '<b>UserClosed</b> - happens when the kernel receives data on an already closed connection and '+
+ 'responds with RST. '+
+ '<b>NoMemory</b> - happens when there are too many orphaned sockets (not attached to an fd) and '+
+ 'the kernel has to drop a connection - sometimes it will send an RST, sometimes it won\'t. '+
+ '<b>Timeout</b> - happens when a connection times out. '+
+ '<b>Linger</b> - happens when the kernel killed a socket that was already closed by the application and '+
+ 'lingered around for long enough. '+
+ '<b>Failed</b> - happens when the kernel attempted to send an RST but failed because there was no memory available.</p>'
+ },
+
+ 'ip.tcp_functions': {
+ title : 'TCP calls',
+ info: 'Successful or failed calls to functions <code>tcp_sendmsg</code>, <code>tcp_cleanup_rbuf</code>, and <code>tcp_close</code>.'
+ },
+
+ 'ip.total_tcp_bandwidth': {
+ title : 'TCP bandwidth',
+ info: 'Bytes sent and received by functions <code>tcp_sendmsg</code> and <code>tcp_cleanup_rbuf</code>. We use <code>tcp_cleanup_rbuf</code> instead of <code>tcp_recvmsg</code>, because the last one misses <code>tcp_read_sock()</code> traffic and we would also need to have more probes to get the socket and package size.'
+ },
+
+ 'ip.tcp_error': {
+ title : 'TCP errors',
+ info: 'Failed calls to functions <code>tcp_sendmsg</code>, <code>tcp_cleanup_rbuf</code>, and <code>tcp_close</code>.'
+ },
+
+ 'ip.tcp_retransmit': {
+ title : 'TCP retransmit',
+ info: 'Number of packets retransmitted by function <code>tcp_retransmit_skb</code>.'
+ },
+
+ 'ip.udp_functions': {
+ title : 'UDP calls',
+ info: 'Successful or failed calls to functions <code>udp_sendmsg</code> and <code>udp_recvmsg</code>.'
+ },
+
+ 'ip.total_udp_bandwidth': {
+ title : 'UDP bandwidth',
+ info: 'Bytes sent and received by functions <code>udp_sendmsg</code> and <code>udp_recvmsg</code>.'
+ },
+
+ 'ip.udp_error': {
+ title : 'UDP errors',
+ info: 'Failed calls to functions <code>udp_sendmsg</code> and <code>udp_recvmsg</code>.'
},
+
'ip.tcp_syn_queue': {
- info: 'The <b>SYN queue</b> of the kernel tracks TCP handshakes until connections get fully established. ' +
+ info: '<p>The SYN queue of the kernel tracks TCP handshakes until connections get fully established. ' +
'It overflows when too many incoming TCP connection requests hang in the half-open state and the server ' +
- 'is not configured to fall back to SYN cookies*. Overflows are usually caused by SYN flood DoS attacks ' +
- '(i.e. someone sends lots of SYN packets and never completes the handshakes). ' +
- '<b>drops</b> (or <code>TcpExtTCPReqQFullDrop</code>) is the number of connections dropped because the ' +
- 'SYN queue was full and SYN cookies were disabled. ' +
- '<b>cookies</b> (or <code>TcpExtTCPReqQFullDoCookies</code>) is the number of SYN cookies sent because the ' +
- 'SYN queue was full.'
+ 'is not configured to fall back to SYN cookies. Overflows are usually caused by SYN flood DoS attacks.</p>' +
+ '<p><b>Drops</b> - number of connections dropped because the SYN queue was full and SYN cookies were disabled. ' +
+ '<b>Cookies</b> - number of SYN cookies sent because the SYN queue was full.</p>'
},
'ip.tcp_accept_queue': {
- info: 'The <b>accept queue</b> of the kernel holds the fully established TCP connections, waiting to be handled ' +
- 'by the listening application. <b>overflows</b> (or <code>ListenOverflows</code>) is the number of ' +
- 'established connections that could not be handled because the receive queue of the listening application ' +
- 'was full. <b>drops</b> (or <code>ListenDrops</code>) is the number of incoming ' +
- 'connections that could not be handled, including SYN floods, overflows, out of memory, security issues, ' +
- 'no route to destination, reception of related ICMP messages, socket is broadcast or multicast.'
+ info: '<p>The accept queue of the kernel holds the fully established TCP connections, waiting to be handled ' +
+ 'by the listening application.</p>'+
+ '<b>Overflows</b> - the number of established connections that could not be handled because '+
+ 'the receive queue of the listening application was full. '+
+ '<b>Drops</b> - number of incoming connections that could not be handled, including SYN floods, '+
+ 'overflows, out of memory, security issues, no route to destination, reception of related ICMP messages, '+
+ 'socket is broadcast or multicast.</p>'
},
// ------------------------------------------------------------------------
// IPv4
+ 'ipv4.packets': {
+ info: '<p>IPv4 packets statistics for this host.</p>'+
+ '<p><b>Received</b> - packets received by the IP layer. '+
+ 'This counter will be increased even if the packet is dropped later. '+
+ '<b>Sent</b> - packets sent via IP layer, for both single cast and multicast packets. '+
+ 'This counter does not include any packets counted in <b>Forwarded</b>. '+
+ '<b>Forwarded</b> - input packets for which this host was not their final IP destination, '+
+ 'as a result of which an attempt was made to find a route to forward them to that final destination. '+
+ 'In hosts which do not act as IP Gateways, this counter will include only those packets which were '+
+ '<a href="https://en.wikipedia.org/wiki/Source_routing" target="_blank">Source-Routed</a> '+
+ 'and the Source-Route option processing was successful. '+
+ '<b>Delivered</b> - packets delivered to the upper layer protocols, e.g. TCP, UDP, ICMP, and so on.</p>'
+ },
+
+ 'ipv4.fragsout': {
+ info: '<p><a href="https://en.wikipedia.org/wiki/IPv4#Fragmentation" target="_blank">IPv4 fragmentation</a> '+
+ 'statistics for this system.</p>'+
+ '<p><b>OK</b> - packets that have been successfully fragmented. '+
+ '<b>Failed</b> - packets that have been discarded because they needed to be fragmented '+
+ 'but could not be, e.g. due to <i>Don\'t Fragment</i> (DF) flag was set. '+
+ '<b>Created</b> - fragments that have been generated as a result of fragmentation.</p>'
+ },
+
+ 'ipv4.fragsin': {
+ info: '<p><a href="https://en.wikipedia.org/wiki/IPv4#Reassembly" target="_blank">IPv4 reassembly</a> '+
+ 'statistics for this system.</p>'+
+ '<p><b>OK</b> - packets that have been successfully reassembled. '+
+ '<b>Failed</b> - failures detected by the IP reassembly algorithm. '+
+ 'This is not necessarily a count of discarded IP fragments since some algorithms '+
+ 'can lose track of the number of fragments by combining them as they are received. '+
+ '<b>All</b> - received IP fragments which needed to be reassembled.</p>'
+ },
+
+ 'ipv4.errors': {
+ info: '<p>The number of discarded IPv4 packets.</p>'+
+ '<p><b>InDiscards</b>, <b>OutDiscards</b> - inbound and outbound packets which were chosen '+
+ 'to be discarded even though no errors had been '+
+ 'detected to prevent their being deliverable to a higher-layer protocol. '+
+ '<b>InHdrErrors</b> - input packets that have been discarded due to errors in their IP headers, including '+
+ 'bad checksums, version number mismatch, other format errors, time-to-live exceeded, '+
+ 'errors discovered in processing their IP options, etc. '+
+ '<b>OutNoRoutes</b> - packets that have been discarded because no route could be found '+
+ 'to transmit them to their destination. This includes any packets which a host cannot route '+
+ 'because all of its default gateways are down. '+
+ '<b>InAddrErrors</b> - input packets that have been discarded due to invalid IP address or '+
+ 'the destination IP address is not a local address and IP forwarding is not enabled. '+
+ '<b>InUnknownProtos</b> - input packets which were discarded because of an unknown or unsupported protocol.</p>'
+ },
+
+ 'ipv4.icmp': {
+ info: '<p>The number of transferred IPv4 ICMP messages.</p>'+
+ '<p><b>Received</b>, <b>Sent</b> - ICMP messages which the host received and attempted to send. '+
+ 'Both these counters include errors.</p>'
+ },
+
+ 'ipv4.icmp_errors': {
+ info: '<p>The number of IPv4 ICMP errors.</p>'+
+ '<p><b>InErrors</b> - received ICMP messages but determined as having ICMP-specific errors, '+
+ 'e.g. bad ICMP checksums, bad length, etc. '+
+ '<b>OutErrors</b> - ICMP messages which this host did not send due to '+
+ 'problems discovered within ICMP such as a lack of buffers. '+
+ 'This counter does not include errors discovered outside the ICMP layer '+
+ 'such as the inability of IP to route the resultant datagram. '+
+ '<b>InCsumErrors</b> - received ICMP messages with bad checksum.</p>'
+ },
+
+ 'ipv4.icmpmsg': {
+ info: 'The number of transferred '+
+ '<a href="https://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml" target="_blank">IPv4 ICMP control messages</a>.'
+ },
+
+ 'ipv4.udppackets': {
+ info: 'The number of transferred UDP packets.'
+ },
+
+ 'ipv4.udperrors': {
+ info: '<p>The number of errors encountered during transferring UDP packets.</p>'+
+ '<b>RcvbufErrors</b> - receive buffer is full. '+
+ '<b>SndbufErrors</b> - send buffer is full, no kernel memory available, or '+
+ 'the IP layer reported an error when trying to send the packet and no error queue has been setup. '+
+ '<b>InErrors</b> - that is an aggregated counter for all errors, excluding <b>NoPorts</b>. '+
+ '<b>NoPorts</b> - no application is listening at the destination port. '+
+ '<b>InCsumErrors</b> - a UDP checksum failure is detected. '+
+ '<b>IgnoredMulti</b> - ignored multicast packets.'
+ },
+
+ 'ipv4.udplite': {
+ info: 'The number of transferred UDP-Lite packets.'
+ },
+
+ 'ipv4.udplite_errors': {
+ info: '<p>The number of errors encountered during transferring UDP-Lite packets.</p>'+
+ '<b>RcvbufErrors</b> - receive buffer is full. '+
+ '<b>SndbufErrors</b> - send buffer is full, no kernel memory available, or '+
+ 'the IP layer reported an error when trying to send the packet and no error queue has been setup. '+
+ '<b>InErrors</b> - that is an aggregated counter for all errors, excluding <b>NoPorts</b>. '+
+ '<b>NoPorts</b> - no application is listening at the destination port. '+
+ '<b>InCsumErrors</b> - a UDP checksum failure is detected. '+
+ '<b>IgnoredMulti</b> - ignored multicast packets.'
+ },
+
+ 'ipv4.tcppackets': {
+ info: '<p>The number of packets transferred by the TCP layer.</p>'+
+ '</p><b>Received</b> - received packets, including those received in error, '+
+ 'such as checksum error, invalid TCP header, and so on. '+
+ '<b>Sent</b> - sent packets, excluding the retransmitted packets. '+
+ 'But it includes the SYN, ACK, and RST packets.</p>'
+ },
+
'ipv4.tcpsock': {
- info: 'The number of established TCP connections (known as <code>CurrEstab</code>). This is a snapshot of the established connections at the time of measurement (i.e. a connection established and a connection disconnected within the same iteration will not affect this metric).'
+ info: 'The number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT. '+
+ 'This is a snapshot of the established connections at the time of measurement '+
+ '(i.e. a connection established and a connection disconnected within the same iteration will not affect this metric).'
},
'ipv4.tcpopens': {
- info: '<b>active</b> or <code>ActiveOpens</code> is the number of outgoing TCP <b>connections attempted</b> by this host.'
- + ' <b>passive</b> or <code>PassiveOpens</code> is the number of incoming TCP <b>connections accepted</b> by this host.'
+ info: '<p>TCP connection statistics.</p>'+
+ '<p><b>Active</b> - number of outgoing TCP connections attempted by this host. '+
+ '<b>Passive</b> - number of incoming TCP connections accepted by this host.</p>'
},
'ipv4.tcperrors': {
- info: '<code>InErrs</code> is the number of TCP segments received in error (including header too small, checksum errors, sequence errors, bad packets - for both IPv4 and IPv6).'
- + ' <code>InCsumErrors</code> is the number of TCP segments received with checksum errors (for both IPv4 and IPv6).'
- + ' <code>RetransSegs</code> is the number of TCP segments retransmitted.'
+ info: '<p>TCP errors.</p>'+
+ '<p><b>InErrs</b> - TCP segments received in error '+
+ '(including header too small, checksum errors, sequence errors, bad packets - for both IPv4 and IPv6). '+
+ '<b>InCsumErrors</b> - TCP segments received with checksum errors (for both IPv4 and IPv6). '+
+ '<b>RetransSegs</b> - TCP segments retransmitted.</p>'
},
'ipv4.tcphandshake': {
- info: '<code>EstabResets</code> is the number of established connections resets (i.e. connections that made a direct transition from <code>ESTABLISHED</code> or <code>CLOSE_WAIT</code> to <code>CLOSED</code>).'
- + ' <code>OutRsts</code> is the number of TCP segments sent, with the <code>RST</code> flag set (for both IPv4 and IPv6).'
- + ' <code>AttemptFails</code> is the number of times TCP connections made a direct transition from either <code>SYN_SENT</code> or <code>SYN_RECV</code> to <code>CLOSED</code>, plus the number of times TCP connections made a direct transition from the <code>SYN_RECV</code> to <code>LISTEN</code>.'
- + ' <code>TCPSynRetrans</code> shows retries for new outbound TCP connections, which can indicate general connectivity issues or backlog on the remote host.'
+ info: '<p>TCP handshake statistics.</p>'+
+ '<p><b>EstabResets</b> - established connections resets '+
+ '(i.e. connections that made a direct transition from ESTABLISHED or CLOSE_WAIT to CLOSED). '+
+ '<b>OutRsts</b> - TCP segments sent, with the RST flag set (for both IPv4 and IPv6). '+
+ '<b>AttemptFails</b> - number of times TCP connections made a direct transition from either '+
+ 'SYN_SENT or SYN_RECV to CLOSED, plus the number of times TCP connections made a direct transition '+
+ 'from the SYN_RECV to LISTEN. '+
+ '<b>SynRetrans</b> - shows retries for new outbound TCP connections, '+
+ 'which can indicate general connectivity issues or backlog on the remote host.</p>'
+ },
+
+ 'ipv4.sockstat_sockets': {
+ info: 'The total number of used sockets for all '+
+ '<a href="https://man7.org/linux/man-pages/man7/address_families.7.html" target="_blank">address families</a> '+
+ 'in this system.'
+ },
+
+ 'ipv4.sockstat_tcp_sockets': {
+ info: '<p>The number of TCP sockets in the system in certain '+
+ '<a href="https://en.wikipedia.org/wiki/Transmission_Control_Protocol#Protocol_operation" target="_blank">states</a>.</p>'+
+ '<p><b>Alloc</b> - in any TCP state. '+
+ '<b>Orphan</b> - no longer attached to a socket descriptor in any user processes, '+
+ 'but for which the kernel is still required to maintain state in order to complete the transport protocol. '+
+ '<b>InUse</b> - in any TCP state, excluding TIME-WAIT and CLOSED. '+
+ '<b>TimeWait</b> - in the TIME-WAIT state.</p>'
+ },
+
+ 'ipv4.sockstat_tcp_mem': {
+ info: 'The amount of memory used by allocated TCP sockets.'
+ },
+
+ 'ipv4.sockstat_udp_sockets': {
+ info: 'The number of used UDP sockets.'
+ },
+
+ 'ipv4.sockstat_udp_mem': {
+ info: 'The amount of memory used by allocated UDP sockets.'
+ },
+
+ 'ipv4.sockstat_udplite_sockets': {
+ info: 'The number of used UDP-Lite sockets.'
+ },
+
+ 'ipv4.sockstat_raw_sockets': {
+ info: 'The number of used <a href="https://en.wikipedia.org/wiki/Network_socket#Types" target="_blank"> raw sockets</a>.'
+ },
+
+ 'ipv4.sockstat_frag_sockets': {
+ info: 'The number of entries in hash tables that are used for packet reassembly.'
+ },
+
+ 'ipv4.sockstat_frag_mem': {
+ info: 'The amount of memory used for packet reassembly.'
+ },
+
+ // ------------------------------------------------------------------------
+ // IPv6
+
+ 'ipv6.packets': {
+ info: '<p>IPv6 packet statistics for this host.</p>'+
+ '<p><b>Received</b> - packets received by the IP layer. '+
+ 'This counter will be increased even if the packet is dropped later. '+
+ '<b>Sent</b> - packets sent via IP layer, for both single cast and multicast packets. '+
+ 'This counter does not include any packets counted in <b>Forwarded</b>. '+
+ '<b>Forwarded</b> - input packets for which this host was not their final IP destination, '+
+ 'as a result of which an attempt was made to find a route to forward them to that final destination. '+
+ 'In hosts which do not act as IP Gateways, this counter will include only those packets which were '+
+ '<a href="https://en.wikipedia.org/wiki/Source_routing" target="_blank">Source-Routed</a> '+
+ 'and the Source-Route option processing was successful. '+
+ '<b>Delivers</b> - packets delivered to the upper layer protocols, e.g. TCP, UDP, ICMP, and so on.</p>'
+ },
+
+ 'ipv6.fragsout': {
+ info: '<p><a href="https://en.wikipedia.org/wiki/IP_fragmentation" target="_blank">IPv6 fragmentation</a> '+
+ 'statistics for this system.</p>'+
+ '<p><b>OK</b> - packets that have been successfully fragmented. '+
+ '<b>Failed</b> - packets that have been discarded because they needed to be fragmented '+
+ 'but could not be, e.g. due to <i>Don\'t Fragment</i> (DF) flag was set. '+
+ '<b>All</b> - fragments that have been generated as a result of fragmentation.</p>'
+ },
+
+ 'ipv6.fragsin': {
+ info: '<p><a href="https://en.wikipedia.org/wiki/IP_fragmentation" target="_blank">IPv6 reassembly</a> '+
+ 'statistics for this system.</p>'+
+ '<p><b>OK</b> - packets that have been successfully reassembled. '+
+ '<b>Failed</b> - failures detected by the IP reassembly algorithm. '+
+ 'This is not necessarily a count of discarded IP fragments since some algorithms '+
+ 'can lose track of the number of fragments by combining them as they are received. '+
+ '<b>Timeout</b> - reassembly timeouts detected. '+
+ '<b>All</b> - received IP fragments which needed to be reassembled.</p>'
+ },
+
+ 'ipv6.errors': {
+ info: '<p>The number of discarded IPv6 packets.</p>'+
+ '<p><b>InDiscards</b>, <b>OutDiscards</b> - packets which were chosen to be discarded even though '+
+ 'no errors had been detected to prevent their being deliverable to a higher-layer protocol. '+
+ '<b>InHdrErrors</b> - errors in IP headers, including bad checksums, version number mismatch, '+
+ 'other format errors, time-to-live exceeded, etc. '+
+ '<b>InAddrErrors</b> - invalid IP address or the destination IP address is not a local address and '+
+ 'IP forwarding is not enabled. '+
+ '<b>InUnknownProtos</b> - unknown or unsupported protocol. '+
+ '<b>InTooBigErrors</b> - the size exceeded the link MTU. '+
+ '<b>InTruncatedPkts</b> - packet frame did not carry enough data. '+
+ '<b>InNoRoutes</b> - no route could be found while forwarding. '+
+ '<b>OutNoRoutes</b> - no route could be found for packets generated by this host.</p>'
+ },
+
+ 'ipv6.udppackets': {
+ info: 'The number of transferred UDP packets.'
+ },
+
+ 'ipv6.udperrors': {
+ info: '<p>The number of errors encountered during transferring UDP packets.</p>'+
+ '<b>RcvbufErrors</b> - receive buffer is full. '+
+ '<b>SndbufErrors</b> - send buffer is full, no kernel memory available, or '+
+ 'the IP layer reported an error when trying to send the packet and no error queue has been setup. '+
+ '<b>InErrors</b> - that is an aggregated counter for all errors, excluding <b>NoPorts</b>. '+
+ '<b>NoPorts</b> - no application is listening at the destination port. '+
+ '<b>InCsumErrors</b> - a UDP checksum failure is detected. '+
+ '<b>IgnoredMulti</b> - ignored multicast packets.'
+ },
+
+ 'ipv6.udplitepackets': {
+ info: 'The number of transferred UDP-Lite packets.'
+ },
+
+ 'ipv6.udpliteerrors': {
+ info: '<p>The number of errors encountered during transferring UDP-Lite packets.</p>'+
+ '<p><b>RcvbufErrors</b> - receive buffer is full. '+
+ '<b>SndbufErrors</b> - send buffer is full, no kernel memory available, or '+
+ 'the IP layer reported an error when trying to send the packet and no error queue has been setup. '+
+ '<b>InErrors</b> - that is an aggregated counter for all errors, excluding <b>NoPorts</b>. '+
+ '<b>NoPorts</b> - no application is listening at the destination port. '+
+ '<b>InCsumErrors</b> - a UDP checksum failure is detected.</p>'
+ },
+
+ 'ipv6.mcast': {
+ info: 'Total IPv6 multicast traffic.'
+ },
+
+ 'ipv6.bcast': {
+ info: 'Total IPv6 broadcast traffic.'
+ },
+
+ 'ipv6.mcastpkts': {
+ info: 'Total transferred IPv6 multicast packets.'
+ },
+
+ 'ipv6.icmp': {
+ info: '<p>The number of transferred ICMPv6 messages.</p>'+
+ '<p><b>Received</b>, <b>Sent</b> - ICMP messages which the host received and attempted to send. '+
+ 'Both these counters include errors.</p>'
+ },
+
+ 'ipv6.icmpredir': {
+ info: 'The number of transferred ICMPv6 Redirect messages. '+
+ 'These messages inform a host to update its routing information (to send packets on an alternative route).'
+ },
+
+ 'ipv6.icmpechos': {
+ info: 'The number of ICMPv6 Echo messages.'
+ },
+
+ 'ipv6.icmperrors': {
+ info: '<p>The number of ICMPv6 errors and '+
+ '<a href="https://www.rfc-editor.org/rfc/rfc4443.html#section-3" target="_blank">error messages</a>.</p>'+
+ '<p><b>InErrors</b>, <b>OutErrors</b> - bad ICMP messages (bad ICMP checksums, bad length, etc.). '+
+ '<b>InCsumErrors</b> - wrong checksum.</p>'
+ },
+
+ 'ipv6.groupmemb': {
+ info: '<p>The number of transferred ICMPv6 Group Membership messages.</p>'+
+ '<p> Multicast routers send Group Membership Query messages to learn which groups have members on each of their '+
+ 'attached physical networks. Host computers respond by sending a Group Membership Report for each '+
+ 'multicast group joined by the host. A host computer can also send a Group Membership Report when '+
+ 'it joins a new multicast group. '+
+ 'Group Membership Reduction messages are sent when a host computer leaves a multicast group.</p>'
+ },
+
+ 'ipv6.icmprouter': {
+ info: '<p>The number of transferred ICMPv6 '+
+ '<a href="https://en.wikipedia.org/wiki/Neighbor_Discovery_Protocol" target="_blank">Router Discovery</a> messages.</p>'+
+ '<p>Router <b>Solicitations</b> message is sent from a computer host to any routers on the local area network '+
+ 'to request that they advertise their presence on the network. '+
+ 'Router <b>Advertisement</b> message is sent by a router on the local area network to announce its IP address '+
+ 'as available for routing.</p>'
+ },
+
+ 'ipv6.icmpneighbor': {
+ info: '<p>The number of transferred ICMPv6 '+
+ '<a href="https://en.wikipedia.org/wiki/Neighbor_Discovery_Protocol" target="_blank">Neighbour Discovery</a> messages.</p>'+
+ '<p>Neighbor <b>Solicitations</b> are used by nodes to determine the link layer address '+
+ 'of a neighbor, or to verify that a neighbor is still reachable via a cached link layer address. '+
+ 'Neighbor <b>Advertisements</b> are used by nodes to respond to a Neighbor Solicitation message.</p>'
+ },
+
+ 'ipv6.icmpmldv2': {
+ info: 'The number of transferred ICMPv6 '+
+ '<a href="https://en.wikipedia.org/wiki/Multicast_Listener_Discovery" target="_blank">Multicast Listener Discovery</a> (MLD) messages.'
+ },
+
+ 'ipv6.icmptypes': {
+ info: 'The number of transferred ICMPv6 messages of '+
+ '<a href="https://en.wikipedia.org/wiki/Internet_Control_Message_Protocol_for_IPv6#Types" target="_blank">certain types</a>.'
+ },
+
+ 'ipv6.ect': {
+ info: '<p>Total number of received IPv6 packets with ECN bits set in the system.</p>'+
+ '<p><b>CEP</b> - congestion encountered. '+
+ '<b>NoECTP</b> - non ECN-capable transport. '+
+ '<b>ECTP0</b> and <b>ECTP1</b> - ECN capable transport.</p>'
+ },
+
+ 'ipv6.sockstat6_tcp_sockets': {
+ info: 'The number of TCP sockets in any '+
+ '<a href="https://en.wikipedia.org/wiki/Transmission_Control_Protocol#Protocol_operation" target="_blank">state</a>, '+
+ 'excluding TIME-WAIT and CLOSED.'
+ },
+
+ 'ipv6.sockstat6_udp_sockets': {
+ info: 'The number of used UDP sockets.'
+ },
+
+ 'ipv6.sockstat6_udplite_sockets': {
+ info: 'The number of used UDP-Lite sockets.'
+ },
+
+ 'ipv6.sockstat6_raw_sockets': {
+ info: 'The number of used <a href="https://en.wikipedia.org/wiki/Network_socket#Types" target="_blank"> raw sockets</a>.'
},
+ 'ipv6.sockstat6_frag_sockets': {
+ info: 'The number of entries in hash tables that are used for packet reassembly.'
+ },
+
+
// ------------------------------------------------------------------------
- // APPS
+ // SCTP
+
+ 'sctp.established': {
+ info: 'The number of associations for which the current state is either '+
+ 'ESTABLISHED, SHUTDOWN-RECEIVED or SHUTDOWN-PENDING.'
+ },
+
+ 'sctp.transitions': {
+ info: '<p>The number of times that associations have made a direct transition between states.</p>'+
+ '<p><b>Active</b> - from COOKIE-ECHOED to ESTABLISHED. The upper layer initiated the association attempt. '+
+ '<b>Passive</b> - from CLOSED to ESTABLISHED. The remote endpoint initiated the association attempt. '+
+ '<b>Aborted</b> - from any state to CLOSED using the primitive ABORT. Ungraceful termination of the association. '+
+ '<b>Shutdown</b> - from SHUTDOWN-SENT or SHUTDOWN-ACK-SENT to CLOSED. Graceful termination of the association.</p>'
+ },
+
+ 'sctp.packets': {
+ info: '<p>The number of transferred SCTP packets.</p>'+
+ '<p><b>Received</b> - includes duplicate packets. '+
+ '<b>Sent</b> - includes retransmitted DATA chunks.</p>'
+ },
+ 'sctp.packet_errors': {
+ info: '<p>The number of errors encountered during receiving SCTP packets.</p>'+
+ '<p><b>Invalid</b> - packets for which the receiver was unable to identify an appropriate association. '+
+ '<b>Checksum</b> - packets with an invalid checksum.</p>'
+ },
+
+ 'sctp.fragmentation': {
+ info: '<p>The number of fragmented and reassembled SCTP messages.</p>'+
+ '<p><b>Reassembled</b> - reassembled user messages, after conversion into DATA chunks. '+
+ '<b>Fragmented</b> - user messages that have to be fragmented because of the MTU.</p>'
+ },
+
+ 'sctp.chunks': {
+ info: 'The number of transferred control, ordered, and unordered DATA chunks. '+
+ 'Retransmissions and duplicates are not included.'
+ },
+
+ // ------------------------------------------------------------------------
+ // Netfilter Connection Tracker
+
+ 'netfilter.conntrack_sockets': {
+ info: 'The number of entries in the conntrack table.'
+ },
+
+ 'netfilter.conntrack_new': {
+ info: '<p>Packet tracking statistics.</p>'+
+ '<p><b>New</b> - conntrack entries added which were not expected before. '+
+ '<b>Ignore</b> - packets seen which are already connected to a conntrack entry. '+
+ '<b>Invalid</b> - packets seen which can not be tracked.</p>'
+ },
+
+ 'netfilter.conntrack_changes': {
+ info: '<p>The number of changes in conntrack tables.</p>'+
+ '<p><b>Inserted</b>, <b>Deleted</b> - conntrack entries which were inserted or removed. '+
+ '<b>Delete-list</b> - conntrack entries which were put to dying list.</p>'
+ },
+
+ 'netfilter.conntrack_expect': {
+ info: '<p>The number of events in the "expect" table. '+
+ 'Connection tracking expectations are the mechanism used to "expect" RELATED connections to existing ones. '+
+ 'An expectation is a connection that is expected to happen in a period of time.</p>'+
+ '<p><b>Created</b>, <b>Deleted</b> - conntrack entries which were inserted or removed. '+
+ '<b>New</b> - conntrack entries added after an expectation for them was already present.</p>'
+ },
+
+ 'netfilter.conntrack_search': {
+ info: '<p>Conntrack table lookup statistics.</p>'+
+ '<p><b>Searched</b> - conntrack table lookups performed. '+
+ '<b>Restarted</b> - conntrack table lookups which had to be restarted due to hashtable resizes. '+
+ '<b>Found</b> - conntrack table lookups which were successful.</p>'
+ },
+
+ 'netfilter.conntrack_errors': {
+ info: '<p>Conntrack errors.</p>'+
+ '<p><b>IcmpError</b> - packets which could not be tracked due to error situation. '+
+ '<b>InsertFailed</b> - entries for which list insertion was attempted but failed '+
+ '(happens if the same entry is already present). '+
+ '<b>Drop</b> - packets dropped due to conntrack failure. '+
+ 'Either new conntrack entry allocation failed, or protocol helper dropped the packet. '+
+ '<b>EarlyDrop</b> - dropped conntrack entries to make room for new ones, if maximum table size was reached.</p>'
+ },
+
+ 'netfilter.synproxy_syn_received': {
+ info: 'The number of initial TCP SYN packets received from clients.'
+ },
+
+ 'netfilter.synproxy_conn_reopened': {
+ info: 'The number of reopened connections by new TCP SYN packets directly from the TIME-WAIT state.'
+ },
+
+ 'netfilter.synproxy_cookies': {
+ info: '<p>SYNPROXY cookie statistics.</p>'+
+ '<p><b>Valid</b>, <b>Invalid</b> - result of cookie validation in TCP ACK packets received from clients. '+
+ '<b>Retransmits</b> - TCP SYN packets retransmitted to the server. '+
+ 'It happens when the client repeats TCP ACK and the connection to the server is not yet established.</p>'
+ },
+
+ // ------------------------------------------------------------------------
+ // APPS (Applications, Groups, Users)
+
+ // APPS cpu
'apps.cpu': {
- height: 2.0
+ info: 'Total CPU utilization (all cores). It includes user, system and guest time.'
+ },
+ 'groups.cpu': {
+ info: 'Total CPU utilization (all cores). It includes user, system and guest time.'
+ },
+ 'users.cpu': {
+ info: 'Total CPU utilization (all cores). It includes user, system and guest time.'
+ },
+
+ 'apps.cpu_user': {
+ info: 'The amount of time the CPU was busy executing code in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">user mode</a> (all cores).'
+ },
+ 'groups.cpu_user': {
+ info: 'The amount of time the CPU was busy executing code in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">user mode</a> (all cores).'
+ },
+ 'users.cpu_user': {
+ info: 'The amount of time the CPU was busy executing code in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">user mode</a> (all cores).'
+ },
+
+ 'apps.cpu_system': {
+ info: 'The amount of time the CPU was busy executing code in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">kernel mode</a> (all cores).'
+ },
+ 'groups.cpu_system': {
+ info: 'The amount of time the CPU was busy executing code in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">kernel mode</a> (all cores).'
+ },
+ 'users.cpu_system': {
+ info: 'The amount of time the CPU was busy executing code in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">kernel mode</a> (all cores).'
+ },
+
+ 'apps.cpu_guest': {
+ info: 'The amount of time spent running a virtual CPU for a guest operating system (all cores).'
+ },
+ 'groups.cpu_guest': {
+ info: 'The amount of time spent running a virtual CPU for a guest operating system (all cores).'
+ },
+ 'users.cpu_guest': {
+ info: 'The amount of time spent running a virtual CPU for a guest operating system (all cores).'
+ },
+
+ // APPS disk
+ 'apps.preads': {
+ info: 'The amount of data that has been read from the storage layer. '+
+ 'Actual physical disk I/O was required.'
+ },
+ 'groups.preads': {
+ info: 'The amount of data that has been read from the storage layer. '+
+ 'Actual physical disk I/O was required.'
+ },
+ 'users.preads': {
+ info: 'The amount of data that has been read from the storage layer. '+
+ 'Actual physical disk I/O was required.'
},
+ 'apps.pwrites': {
+ info: 'The amount of data that has been written to the storage layer. '+
+ 'Actual physical disk I/O was required.'
+ },
+ 'groups.pwrites': {
+ info: 'The amount of data that has been written to the storage layer. '+
+ 'Actual physical disk I/O was required.'
+ },
+ 'users.pwrites': {
+ info: 'The amount of data that has been written to the storage layer. '+
+ 'Actual physical disk I/O was required.'
+ },
+
+ 'apps.lreads': {
+ info: 'The amount of data that has been read from the storage layer. '+
+ 'It includes things such as terminal I/O and is unaffected by whether or '+
+ 'not actual physical disk I/O was required '+
+ '(the read might have been satisfied from pagecache).'
+ },
+ 'groups.lreads': {
+ info: 'The amount of data that has been read from the storage layer. '+
+ 'It includes things such as terminal I/O and is unaffected by whether or '+
+ 'not actual physical disk I/O was required '+
+ '(the read might have been satisfied from pagecache).'
+ },
+ 'users.lreads': {
+ info: 'The amount of data that has been read from the storage layer. '+
+ 'It includes things such as terminal I/O and is unaffected by whether or '+
+ 'not actual physical disk I/O was required '+
+ '(the read might have been satisfied from pagecache).'
+ },
+
+ 'apps.lwrites': {
+ info: 'The amount of data that has been written or shall be written to the storage layer. '+
+ 'It includes things such as terminal I/O and is unaffected by whether or '+
+ 'not actual physical disk I/O was required.'
+ },
+ 'groups.lwrites': {
+ info: 'The amount of data that has been written or shall be written to the storage layer. '+
+ 'It includes things such as terminal I/O and is unaffected by whether or '+
+ 'not actual physical disk I/O was required.'
+ },
+ 'users.lwrites': {
+ info: 'The amount of data that has been written or shall be written to the storage layer. '+
+ 'It includes things such as terminal I/O and is unaffected by whether or '+
+ 'not actual physical disk I/O was required.'
+ },
+
+ 'apps.files': {
+ info: 'The number of open files and directories.'
+ },
+ 'groups.files': {
+ info: 'The number of open files and directories.'
+ },
+ 'users.files': {
+ info: 'The number of open files and directories.'
+ },
+
+ // APPS mem
'apps.mem': {
info: 'Real memory (RAM) used by applications. This does not include shared memory.'
},
+ 'groups.mem': {
+ info: 'Real memory (RAM) used per user group. This does not include shared memory.'
+ },
+ 'users.mem': {
+ info: 'Real memory (RAM) used per user group. This does not include shared memory.'
+ },
'apps.vmem': {
- info: 'Virtual memory allocated by applications. Please check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
+ info: 'Virtual memory allocated by applications. '+
+ 'Check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
+ },
+ 'groups.vmem': {
+ info: 'Virtual memory allocated per user group since the Netdata restart. Please check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
+ },
+ 'users.vmem': {
+ info: 'Virtual memory allocated per user group since the Netdata restart. Please check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
},
- 'apps.preads': {
- height: 2.0
+ 'apps.minor_faults': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Page_fault#Minor" target="_blank">minor faults</a> '+
+ 'which have not required loading a memory page from the disk. '+
+ 'Minor page faults occur when a process needs data that is in memory and is assigned to another process. '+
+ 'They share memory pages between multiple processes – '+
+ 'no additional data needs to be read from disk to memory.'
+ },
+ 'groups.minor_faults': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Page_fault#Minor" target="_blank">minor faults</a> '+
+ 'which have not required loading a memory page from the disk. '+
+ 'Minor page faults occur when a process needs data that is in memory and is assigned to another process. '+
+ 'They share memory pages between multiple processes – '+
+ 'no additional data needs to be read from disk to memory.'
+ },
+ 'users.minor_faults': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Page_fault#Minor" target="_blank">minor faults</a> '+
+ 'which have not required loading a memory page from the disk. '+
+ 'Minor page faults occur when a process needs data that is in memory and is assigned to another process. '+
+ 'They share memory pages between multiple processes – '+
+ 'no additional data needs to be read from disk to memory.'
},
- 'apps.pwrites': {
- height: 2.0
+ // APPS processes
+ 'apps.threads': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Thread_(computing)" target="_blank">threads</a>.'
+ },
+ 'groups.threads': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Thread_(computing)" target="_blank">threads</a>.'
+ },
+ 'users.threads': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Thread_(computing)" target="_blank">threads</a>.'
+ },
+
+ 'apps.processes': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Process_(computing)" target="_blank">processes</a>.'
+ },
+ 'groups.processes': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Process_(computing)" target="_blank">processes</a>.'
+ },
+ 'users.processes': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Process_(computing)" target="_blank">processes</a>.'
},
'apps.uptime': {
- info: 'Carried over process group uptime since the Netdata restart. The period of time within which at least one process in the group was running.'
+ info: 'The period of time within which at least one process in the group has been running.'
+ },
+ 'groups.uptime': {
+ info: 'The period of time within which at least one process in the group has been running.'
+ },
+ 'users.uptime': {
+ info: 'The period of time within which at least one process in the group has been running.'
+ },
+
+ 'apps.uptime_min': {
+ info: 'The shortest uptime among processes in the group.'
+ },
+ 'groups.uptime_min': {
+ info: 'The shortest uptime among processes in the group.'
+ },
+ 'users.uptime_min': {
+ info: 'The shortest uptime among processes in the group.'
+ },
+
+ 'apps.uptime_avg': {
+ info: 'The average uptime of processes in the group.'
+ },
+ 'groups.uptime_avg': {
+ info: 'The average uptime of processes in the group.'
+ },
+ 'users.uptime_avg': {
+ info: 'The average uptime of processes in the group.'
+ },
+
+ 'apps.uptime_max': {
+ info: 'The longest uptime among processes in the group.'
+ },
+ 'groups.uptime_max': {
+ info: 'The longest uptime among processes in the group.'
+ },
+ 'users.uptime_max': {
+ info: 'The longest uptime among processes in the group.'
+ },
+
+ 'apps.pipes': {
+ info: 'The number of open '+
+ '<a href="https://en.wikipedia.org/wiki/Anonymous_pipe#Unix" target="_blank">pipes</a>. '+
+ 'A pipe is a unidirectional data channel that can be used for interprocess communication.'
+ },
+ 'groups.pipes': {
+ info: 'The number of open '+
+ '<a href="https://en.wikipedia.org/wiki/Anonymous_pipe#Unix" target="_blank">pipes</a>. '+
+ 'A pipe is a unidirectional data channel that can be used for interprocess communication.'
+ },
+ 'users.pipes': {
+ info: 'The number of open '+
+ '<a href="https://en.wikipedia.org/wiki/Anonymous_pipe#Unix" target="_blank">pipes</a>. '+
+ 'A pipe is a unidirectional data channel that can be used for interprocess communication.'
},
+ // APPS swap
+ 'apps.swap': {
+ info: 'The amount of swapped-out virtual memory by anonymous private pages. '+
+ 'This does not include shared swap memory.'
+ },
+ 'groups.swap': {
+ info: 'The amount of swapped-out virtual memory by anonymous private pages. '+
+ 'This does not include shared swap memory.'
+ },
+ 'users.swap': {
+ info: 'The amount of swapped-out virtual memory by anonymous private pages. '+
+ 'This does not include shared swap memory.'
+ },
+
+ 'apps.major_faults': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Page_fault#Major" target="_blank">major faults</a> '+
+ 'which have required loading a memory page from the disk. '+
+ 'Major page faults occur because of the absence of the required page from the RAM. '+
+ 'They are expected when a process starts or needs to read in additional data and '+
+ 'in these cases do not indicate a problem condition. '+
+ 'However, a major page fault can also be the result of reading memory pages that have been written out '+
+ 'to the swap file, which could indicate a memory shortage.'
+ },
+ 'groups.major_faults': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Page_fault#Major" target="_blank">major faults</a> '+
+ 'which have required loading a memory page from the disk. '+
+ 'Major page faults occur because of the absence of the required page from the RAM. '+
+ 'They are expected when a process starts or needs to read in additional data and '+
+ 'in these cases do not indicate a problem condition. '+
+ 'However, a major page fault can also be the result of reading memory pages that have been written out '+
+ 'to the swap file, which could indicate a memory shortage.'
+ },
+ 'users.major_faults': {
+ info: 'The number of <a href="https://en.wikipedia.org/wiki/Page_fault#Major" target="_blank">major faults</a> '+
+ 'which have required loading a memory page from the disk. '+
+ 'Major page faults occur because of the absence of the required page from the RAM. '+
+ 'They are expected when a process starts or needs to read in additional data and '+
+ 'in these cases do not indicate a problem condition. '+
+ 'However, a major page fault can also be the result of reading memory pages that have been written out '+
+ 'to the swap file, which could indicate a memory shortage.'
+ },
+
+ // APPS net
+ 'apps.sockets': {
+ info: 'The number of open sockets. '+
+ 'Sockets are a way to enable inter-process communication between programs running on a server, '+
+ 'or between programs running on separate servers. This includes both network and UNIX sockets.'
+ },
+ 'groups.sockets': {
+ info: 'The number of open sockets. '+
+ 'Sockets are a way to enable inter-process communication between programs running on a server, '+
+ 'or between programs running on separate servers. This includes both network and UNIX sockets.'
+ },
+ 'users.sockets': {
+ info: 'The number of open sockets. '+
+ 'Sockets are a way to enable inter-process communication between programs running on a server, '+
+ 'or between programs running on separate servers. This includes both network and UNIX sockets.'
+ },
+
+ // Apps eBPF stuff
+
'apps.file_open': {
- info: 'Calls to the internal function <code>do_sys_open</code> ( For kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ), which is the common function called from' +
+ info: 'Calls to the internal function <code>do_sys_open</code> (for kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ), which is the common function called from' +
' <a href="https://www.man7.org/linux/man-pages/man2/open.2.html" target="_blank">open(2)</a> ' +
' and <a href="https://www.man7.org/linux/man-pages/man2/openat.2.html" target="_blank">openat(2)</a>. '
},
'apps.file_open_error': {
- info: 'Failed calls to the internal function <code>do_sys_open</code> ( For kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ).'
+ info: 'Failed calls to the internal function <code>do_sys_open</code> (for kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ).'
},
'apps.file_closed': {
@@ -1256,47 +2576,55 @@ netdataDashboard.context = {
},
'apps.process_create': {
- info: 'Calls to either <a href="https://www.ece.uic.edu/~yshi1/linux/lkse/node4.html#SECTION00421000000000000000" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the process by counting the number of calls to <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that do not have the flag <code>CLONE_THREAD</code> set.'
+ info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. This chart is provided by eBPF plugin.'
},
'apps.thread_create': {
- info: 'Calls to either <a href="https://www.ece.uic.edu/~yshi1/linux/lkse/node4.html#SECTION00421000000000000000" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls to <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that have the flag <code>CLONE_THREAD</code> set.'
+ info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads monitoring tracepoint <code>sched_process_fork</code>. This chart is provided by eBPF plugin.'
+ },
+
+ 'apps.task_exit': {
+ info: 'Calls to the function responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) tasks. This chart is provided by eBPF plugin.'
},
'apps.task_close': {
- info: 'Calls to the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+ info: 'Calls to the function responsible for releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks. This chart is provided by eBPF plugin.'
+ },
+
+ 'apps.task_error': {
+ info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
},
- 'apps.bandwidth_sent': {
+ 'apps.total_bandwidth_sent': {
info: 'Bytes sent by functions <code>tcp_sendmsg</code> and <code>udp_sendmsg</code>.'
},
- 'apps.bandwidth_recv': {
+ 'apps.total_bandwidth_recv': {
info: 'Bytes received by functions <code>tcp_cleanup_rbuf</code> and <code>udp_recvmsg</code>. We use <code>tcp_cleanup_rbuf</code> instead <code>tcp_recvmsg</code>, because this last misses <code>tcp_read_sock()</code> traffic and we would also need to have more probes to get the socket and package size.'
},
'apps.bandwidth_tcp_send': {
- info: 'Calls for function <code>tcp_sendmsg</code>.'
+ info: 'The function <code>tcp_sendmsg</code> is used to collect number of bytes sent from TCP connections.'
},
'apps.bandwidth_tcp_recv': {
- info: 'Calls for functions <code>tcp_cleanup_rbuf</code>. We use <code>tcp_cleanup_rbuf</code> instead <code>tcp_recvmsg</code>, because this last misses <code>tcp_read_sock()</code> traffic and we would also need to have more probes to get the socket and package size.'
+ info: 'The function <code>tcp_cleanup_rbuf</code> is used to collect number of bytes received from TCP connections.'
},
'apps.bandwidth_tcp_retransmit': {
- info: 'Calls for functions <code>tcp_retransmit_skb</code>.'
+ info: 'The function <code>tcp_retransmit_skb</code> is called when the host did not receive the expected return from a packet sent.'
},
'apps.bandwidth_udp_send': {
- info: 'Calls for function <code>udp_sendmsg</code>.'
+ info: 'The function <code>udp_sendmsg</code> is used to collect number of bytes sent from UDP connections.'
},
'apps.bandwidth_udp_recv': {
- info: 'Calls for function <code>udp_recvmsg</code>.'
+ info: 'The function <code>udp_recvmsg</code> is used to collect number of bytes received from UDP connections.'
},
'apps.dc_hit_ratio': {
- info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>. Netdata also gives a summary for these charts in <a href="#menu_filesystem_submenu_directory_cache__eBPF_">Filesystem submenu</a>.'
},
'apps.dc_reference': {
@@ -1312,60 +2640,6 @@ netdataDashboard.context = {
},
// ------------------------------------------------------------------------
- // USERS
-
- 'users.cpu': {
- height: 2.0
- },
-
- 'users.mem': {
- info: 'Real memory (RAM) used per user. This does not include shared memory.'
- },
-
- 'users.vmem': {
- info: 'Virtual memory allocated per user. Please check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
- },
-
- 'users.preads': {
- height: 2.0
- },
-
- 'users.pwrites': {
- height: 2.0
- },
-
- 'users.uptime': {
- info: 'Carried over process group uptime since the Netdata restart. The period of time within which at least one process in the group was running.'
- },
-
- // ------------------------------------------------------------------------
- // GROUPS
-
- 'groups.cpu': {
- height: 2.0
- },
-
- 'groups.mem': {
- info: 'Real memory (RAM) used per user group. This does not include shared memory.'
- },
-
- 'groups.vmem': {
- info: 'Virtual memory allocated per user group since the Netdata restart. Please check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
- },
-
- 'groups.preads': {
- height: 2.0
- },
-
- 'groups.pwrites': {
- height: 2.0
- },
-
- 'groups.uptime': {
- info: 'Carried over process group uptime. The period of time within which at least one process in the group was running.'
- },
-
- // ------------------------------------------------------------------------
// NETWORK QoS
'tc.qos': {
@@ -1428,7 +2702,146 @@ netdataDashboard.context = {
else
return '';
}
- ]
+ ],
+ info: 'The amount of traffic transferred by the network interface.'
+ },
+ 'net.packets': {
+ info: 'The number of packets transferred by the network interface. '+
+ 'Received <a href="https://en.wikipedia.org/wiki/Multicast" target="_blank">multicast</a> counter is '+
+ 'commonly calculated at the device level (unlike <b>received</b>) and therefore may include packets which did not reach the host.'
+ },
+ 'net.errors': {
+ info: '<p>The number of errors encountered by the network interface.</p>'+
+ '<p><b>Inbound</b> - bad packets received on this interface. '+
+ 'It includes dropped packets due to invalid length, CRC, frame alignment, and other errors. '+
+ '<b>Outbound</b> - transmit problems. '+
+ 'It includes frames transmission errors due to loss of carrier, FIFO underrun/underflow, heartbeat, '+
+ 'late collisions, and other problems.</p>'
+ },
+ 'net.fifo': {
+ info: '<p>The number of FIFO errors encountered by the network interface.</p>'+
+ '<p><b>Inbound</b> - packets dropped because they did not fit into buffers provided by the host, '+
+ 'e.g. packets larger than MTU or next buffer in the ring was not available for a scatter transfer. '+
+ '<b>Outbound</b> - frame transmission errors due to device FIFO underrun/underflow. '+
+ 'This condition occurs when the device begins transmission of a frame '+
+ 'but is unable to deliver the entire frame to the transmitter in time for transmission.</p>'
+ },
+ 'net.drops': {
+ info: '<p>The number of packets that have been dropped at the network interface level.</p>'+
+ '<p><b>Inbound</b> - packets received but not processed, e.g. due to '+
+ '<a href="#menu_system_submenu_softnet_stat">softnet backlog</a> overflow, bad/unintended VLAN tags, '+
+ 'unknown or unregistered protocols, IPv6 frames when the server is not configured for IPv6. '+
+ '<b>Outbound</b> - packets dropped on their way to transmission, e.g. due to lack of resources.</p>'
+ },
+ 'net.compressed': {
+ info: 'The number of correctly transferred compressed packets by the network interface. '+
+ 'These counters are only meaningful for interfaces which support packet compression (e.g. CSLIP, PPP).'
+ },
+ 'net.events': {
+ info: '<p>The number of errors encountered by the network interface.</p>'+
+ '<p><b>Frames</b> - aggregated counter for dropped packets due to '+
+ 'invalid length, FIFO overflow, CRC, and frame alignment errors. '+
+ '<b>Collisions</b> - '+
+ '<a href="https://en.wikipedia.org/wiki/Collision_(telecommunications)" target="blank">collisions</a> during packet transmissions. '+
+ '<b>Carrier</b> - aggregated counter for frame transmission errors due to '+
+ 'excessive collisions, loss of carrier, device FIFO underrun/underflow, Heartbeat/SQE Test errors, and late collisions.</p>'
+ },
+ 'net.duplex': {
+ info: '<p>The interface\'s latest or current '+
+ '<a href="https://en.wikipedia.org/wiki/Duplex_(telecommunications)" target="_blank">duplex</a> that the network adapter '+
+ '<a href="https://en.wikipedia.org/wiki/Autonegotiation" target="_blank">negotiated</a> with the device it is connected to.</p>'+
+ '<p><b>Unknown</b> - the duplex mode can not be determined. '+
+ '<b>Half duplex</b> - the communication is one direction at a time. '+
+ '<b>Full duplex</b> - the interface is able to send and receive data simultaneously.</p>'+
+ '<p><b>State map</b>: 0 - unknown, 1 - half, 2 - full.</p>'
+ },
+ 'net.operstate': {
+ info: '<p>The current '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc2863" target="_blank">operational state</a> of the interface.</p>'+
+ '<p><b>Unknown</b> - the state can not be determined. '+
+ '<b>NotPresent</b> - the interface has missing (typically, hardware) components. '+
+ '<b>Down</b> - the interface is unable to transfer data on L1, e.g. ethernet is not plugged or interface is administratively down. '+
+ '<b>LowerLayerDown</b> - the interface is down due to state of lower-layer interface(s). '+
+ '<b>Testing</b> - the interface is in testing mode, e.g. cable test. It can’t be used for normal traffic until tests complete. '+
+ '<b>Dormant</b> - the interface is L1 up, but waiting for an external event, e.g. for a protocol to establish. '+
+ '<b>Up</b> - the interface is ready to pass packets and can be used.</p>'+
+ '<p><b>State map</b>: 0 - unknown, 1 - notpresent, 2 - down, 3 - lowerlayerdown, 4 - testing, 5 - dormant, 6 - up.</p>'
+ },
+ 'net.carrier': {
+ info: '<p>The current physical link state of the interface.</p>'+
+ '<p><b>State map</b>: 0 - down, 1 - up.</p>'
+ },
+ 'net.speed': {
+ info: 'The interface\'s latest or current speed that the network adapter '+
+ '<a href="https://en.wikipedia.org/wiki/Autonegotiation" target="_blank">negotiated</a> with the device it is connected to. '+
+ 'This does not give the max supported speed of the NIC.'
+ },
+ 'net.mtu': {
+ info: 'The interface\'s currently configured '+
+ '<a href="https://en.wikipedia.org/wiki/Maximum_transmission_unit" target="_blank">Maximum transmission unit</a> (MTU) value. '+
+ 'MTU is the size of the largest protocol data unit that can be communicated in a single network layer transaction.'
+ },
+
+ // ------------------------------------------------------------------------
+ // WIRELESS NETWORK INTERFACES
+
+ 'wireless.link_quality': {
+ info: 'Overall quality of the link. '+
+ 'May be based on the level of contention or interference, the bit or frame error rate, '+
+ 'how good the received signal is, some timing synchronisation, or other hardware metric.'
+ },
+
+ 'wireless.signal_level': {
+ info: 'Received signal strength '+
+ '(<a href="https://en.wikipedia.org/wiki/Received_signal_strength_indication" target="_blank">RSSI</a>).'
+ },
+
+ 'wireless.noise_level': {
+ info: 'Background noise level (when no packet is transmitted).'
+ },
+
+ 'wireless.discarded_packets': {
+ info: '<p>The number of discarded packets.</p>'+
+ '</p><b>NWID</b> - received packets with a different NWID or ESSID. '+
+ 'Used to detect configuration problems or adjacent network existence (on the same frequency). '+
+ '<b>Crypt</b> - received packets that the hardware was unable to code/encode. '+
+ 'This can be used to detect invalid encryption settings. '+
+ '<b>Frag</b> - received packets for which the hardware was not able to properly re-assemble '+
+ 'the link layer fragments (most likely one was missing). '+
+ '<b>Retry</b> - packets that the hardware failed to deliver. '+
+ 'Most MAC protocols will retry the packet a number of times before giving up. '+
+ '<b>Misc</b> - other packets lost in relation with specific wireless operations.</p>'
+ },
+
+ 'wireless.missed_beacons': {
+ info: 'The number of periodic '+
+ '<a href="https://en.wikipedia.org/wiki/Beacon_frame" target="_blank">beacons</a> '+
+ 'from the Cell or the Access Point have been missed. '+
+ 'Beacons are sent at regular intervals to maintain the cell coordination, '+
+ 'failure to receive them usually indicates that the card is out of range.'
+ },
+
+ // ------------------------------------------------------------------------
+ // INFINIBAND
+
+ 'ib.bytes': {
+ info: 'The amount of traffic transferred by the port.'
+ },
+
+ 'ib.packets': {
+ info: 'The number of packets transferred by the port.'
+ },
+
+ 'ib.errors': {
+ info: 'The number of errors encountered by the port.'
+ },
+
+ 'ib.hwerrors': {
+ info: 'The number of hardware errors encountered by the port.'
+ },
+
+ 'ib.hwpackets': {
+ info: 'The number of hardware packets transferred by the port.'
},
// ------------------------------------------------------------------------
@@ -1448,6 +2861,19 @@ netdataDashboard.context = {
},
// ------------------------------------------------------------------------
+ // IPVS
+ 'ipvs.sockets': {
+ info: 'Total created connections for all services and their servers. '+
+ 'To see the IPVS connection table, run <code>ipvsadm -Lnc</code>.'
+ },
+ 'ipvs.packets': {
+ info: 'Total transferred packets for all services and their servers.'
+ },
+ 'ipvs.net': {
+ info: 'Total network traffic for all services and their servers.'
+ },
+
+ // ------------------------------------------------------------------------
// DISKS
'disk.util': {
@@ -1473,13 +2899,29 @@ netdataDashboard.context = {
netdataDashboard.gaugeChart('Read', '12%', 'reads'),
netdataDashboard.gaugeChart('Write', '12%', 'writes')
],
- info: 'Amount of data transferred to and from disk.'
+ info: 'The amount of data transferred to and from disk.'
+ },
+
+ 'disk_ext.io': {
+ info: 'The amount of discarded data that are no longer in use by a mounted file system.'
},
'disk.ops': {
info: 'Completed disk I/O operations. Keep in mind the number of operations requested might be higher, since the system is able to merge adjacent to each other (see merged operations chart).'
},
+ 'disk_ext.ops': {
+ info: '<p>The number (after merges) of completed discard/flush requests.</p>'+
+ '<p><b>Discard</b> commands inform disks which blocks of data are no longer considered to be in use and therefore can be erased internally. '+
+ 'They are useful for solid-state drivers (SSDs) and thinly-provisioned storage. '+
+ 'Discarding/trimming enables the SSD to handle garbage collection more efficiently, '+
+ 'which would otherwise slow future write operations to the involved blocks down.</p>'+
+ '<p><b>Flush</b> operations transfer all modified in-core data (i.e., modified buffer cache pages) to the disk device '+
+ 'so that all changed information can be retrieved even if the system crashes or is rebooted. '+
+ 'Flush requests are executed by disks. Flush requests are not tracked for partitions. '+
+ 'Before being merged, flush operations are counted as writes.</p>'
+ },
+
'disk.qops': {
info: 'I/O operations currently in progress. This metric is a snapshot - it is not an average over the last interval.'
},
@@ -1489,25 +2931,32 @@ netdataDashboard.context = {
info: 'The sum of the duration of all completed I/O operations. This number can exceed the interval if the disk is able to execute I/O operations in parallel.'
},
'disk_ext.iotime': {
- height: 0.5
+ height: 0.5,
+ info: 'The sum of the duration of all completed discard/flush operations. This number can exceed the interval if the disk is able to execute discard/flush operations in parallel.'
},
'disk.mops': {
height: 0.5,
info: 'The number of merged disk operations. The system is able to merge adjacent I/O operations, for example two 4KB reads can become one 8KB read before given to disk.'
},
'disk_ext.mops': {
- height: 0.5
+ height: 0.5,
+ info: 'The number of merged discard disk operations. Discard operations which are adjacent to each other may be merged for efficiency.'
},
'disk.svctm': {
height: 0.5,
info: 'The average service time for completed I/O operations. This metric is calculated using the total busy time of the disk and the number of completed operations. If the disk is able to execute multiple parallel operations the reporting average service time will be misleading.'
},
+ 'disk.latency_io': {
+ height: 0.5,
+ info: 'Disk I/O latency is the time it takes for an I/O request to be completed. Latency is the single most important metric to focus on when it comes to storage performance, under most circumstances. For hard drives, an average latency somewhere between 10 to 20 ms can be considered acceptable. For SSD (Solid State Drives), depending on the workload it should never reach higher than 1-3 ms. In most cases, workloads will experience less than 1ms latency numbers. The dimensions refer to time intervals. This chart is based on the <a href="https://github.com/cloudflare/ebpf_exporter/blob/master/examples/bio-tracepoints.yaml" target="_blank">bio_tracepoints</a> tool of the ebpf_exporter.'
+ },
'disk.avgsz': {
height: 0.5,
info: 'The average I/O operation size.'
},
'disk_ext.avgsz': {
- height: 0.5
+ height: 0.5,
+ info: 'The average discard operation size.'
},
'disk.await': {
height: 0.5,
@@ -1515,21 +2964,296 @@ netdataDashboard.context = {
},
'disk_ext.await': {
height: 0.5,
- info: 'The average time for extended I/O requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.'
+ info: 'The average time for discard/flush requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.'
},
'disk.space': {
info: 'Disk space utilization. reserved for root is automatically reserved by the system to prevent the root user from getting out of space.'
},
'disk.inodes': {
- info: 'inodes (or index nodes) are filesystem objects (e.g. files and directories). On many types of file system implementations, the maximum number of inodes is fixed at filesystem creation, limiting the maximum number of files the filesystem can hold. It is possible for a device to run out of inodes. When this happens, new files cannot be created on the device, even though there may be free space available.'
+ info: 'Inodes (or index nodes) are filesystem objects (e.g. files and directories). On many types of file system implementations, the maximum number of inodes is fixed at filesystem creation, limiting the maximum number of files the filesystem can hold. It is possible for a device to run out of inodes. When this happens, new files cannot be created on the device, even though there may be free space available.'
+ },
+
+ 'disk.bcache_hit_ratio': {
+ info: '<p><b>Bcache (block cache)</b> is a cache in the block layer of Linux kernel, '+
+ 'which is used for accessing secondary storage devices. '+
+ 'It allows one or more fast storage devices, such as flash-based solid-state drives (SSDs), '+
+ 'to act as a cache for one or more slower storage devices, such as hard disk drives (HDDs).</p>'+
+ '<p>Percentage of data requests that were fulfilled right from the block cache. '+
+ 'Hits and misses are counted per individual IO as bcache sees them. '+
+ 'A partial hit is counted as a miss.</p>'
+ },
+ 'disk.bcache_rates': {
+ info: 'Throttling rates. '+
+ 'To avoid congestions bcache tracks latency to the cache device, and gradually throttles traffic if the latency exceeds a threshold. ' +
+ 'If the writeback percentage is nonzero, bcache tries to keep around this percentage of the cache dirty by '+
+ 'throttling background writeback and using a PD controller to smoothly adjust the rate.'
+ },
+ 'disk.bcache_size': {
+ info: 'The amount of dirty data for this backing device in the cache.'
+ },
+ 'disk.bcache_usage': {
+ info: 'The percentage of cache device which does not contain dirty data, and could potentially be used for writeback.'
+ },
+ 'disk.bcache_cache_read_races': {
+ info: '<b>Read races</b> happen when a bucket was reused and invalidated while data was being read from the cache. '+
+ 'When this occurs the data is reread from the backing device. '+
+ '<b>IO errors</b> are decayed by the half life. '+
+ 'If the decaying count reaches the limit, dirty data is written out and the cache is disabled.'
+ },
+ 'disk.bcache': {
+ info: 'Hits and misses are counted per individual IO as bcache sees them; a partial hit is counted as a miss. '+
+ 'Collisions happen when data was going to be inserted into the cache from a cache miss, '+
+ 'but raced with a write and data was already present. '+
+ 'Cache miss reads are rounded up to the readahead size, but without overlapping existing cache entries.'
+ },
+ 'disk.bcache_bypass': {
+ info: 'Hits and misses for IO that is intended to skip the cache.'
+ },
+ 'disk.bcache_cache_alloc': {
+ info: '<p>Working set size.</p>'+
+ '<p><b>Unused</b> is the percentage of the cache that does not contain any data. '+
+ '<b>Dirty</b> is the data that is modified in the cache but not yet written to the permanent storage. '+
+ '<b>Clean</b> data matches the data stored on the permanent storage. '+
+ '<b>Metadata</b> is bcache\'s metadata overhead.</p>'
+ },
+
+ // ------------------------------------------------------------------------
+ // NFS client
+
+ 'nfs.net': {
+ info: 'The number of received UDP and TCP packets.'
+ },
+
+ 'nfs.rpc': {
+ info: '<p>Remote Procedure Call (RPC) statistics.</p>'+
+ '</p><b>Calls</b> - all RPC calls. '+
+ '<b>Retransmits</b> - retransmitted calls. '+
+ '<b>AuthRefresh</b> - authentication refresh calls (validating credentials with the server).</p>'
+ },
+
+ 'nfs.proc2': {
+ info: 'NFSv2 RPC calls. The individual metrics are described in '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc1094#section-2.2" target="_blank">RFC1094</a>.'
+ },
+
+ 'nfs.proc3': {
+ info: 'NFSv3 RPC calls. The individual metrics are described in '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc1813#section-3" target="_blank">RFC1813</a>.'
+ },
+
+ 'nfs.proc4': {
+ info: 'NFSv4 RPC calls. The individual metrics are described in '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc8881#section-18" target="_blank">RFC8881</a>.'
+ },
+
+ // ------------------------------------------------------------------------
+ // NFS server
+
+ 'nfsd.readcache': {
+ info: '<p>Reply cache statistics. '+
+ 'The reply cache keeps track of responses to recently performed non-idempotent transactions, and '+
+ 'in case of a replay, the cached response is sent instead of attempting to perform the operation again.</p>'+
+ '<b>Hits</b> - client did not receive a reply and re-transmitted its request. This event is undesirable. '+
+ '<b>Misses</b> - an operation that requires caching (idempotent). '+
+ '<b>Nocache</b> - an operation that does not require caching (non-idempotent).'
+ },
+
+ 'nfsd.filehandles': {
+ info: '<p>File handle statistics. '+
+ 'File handles are small pieces of memory that keep track of what file is opened.</p>'+
+ '<p><b>Stale</b> - happen when a file handle references a location that has been recycled. '+
+ 'This also occurs when the server loses connection and '+
+ 'applications are still using files that are no longer accessible.'
+ },
+
+ 'nfsd.io': {
+ info: 'The amount of data transferred to and from disk.'
+ },
+
+ 'nfsd.threads': {
+ info: 'The number of threads used by the NFS daemon.'
+ },
+
+ 'nfsd.readahead': {
+ info: '<p>Read-ahead cache statistics. '+
+ 'NFS read-ahead predictively requests blocks from a file in advance of I/O requests by the application. '+
+ 'It is designed to improve client sequential read throughput.</p>'+
+ '<p><b>10%</b>-<b>100%</b> - histogram of depth the block was found. '+
+ 'This means how far the cached block is from the original block that was first requested. '+
+ '<b>Misses</b> - not found in the read-ahead cache.</p>'
+ },
+
+ 'nfsd.net': {
+ info: 'The number of received UDP and TCP packets.'
+ },
+
+ 'nfsd.rpc': {
+ info: '<p>Remote Procedure Call (RPC) statistics.</p>'+
+ '</p><b>Calls</b> - all RPC calls. '+
+ '<b>BadAuth</b> - bad authentication. '+
+ 'It does not count if you try to mount from a machine that it\'s not in your exports file. '+
+ '<b>BadFormat</b> - other errors.</p>'
+ },
+
+ 'nfsd.proc2': {
+ info: 'NFSv2 RPC calls. The individual metrics are described in '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc1094#section-2.2" target="_blank">RFC1094</a>.'
+ },
+
+ 'nfsd.proc3': {
+ info: 'NFSv3 RPC calls. The individual metrics are described in '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc1813#section-3" target="_blank">RFC1813</a>.'
+ },
+
+ 'nfsd.proc4': {
+ info: 'NFSv4 RPC calls. The individual metrics are described in '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc8881#section-18" target="_blank">RFC8881</a>.'
+ },
+
+ 'nfsd.proc4ops': {
+ info: 'NFSv4 RPC operations. The individual metrics are described in '+
+ '<a href="https://datatracker.ietf.org/doc/html/rfc8881#section-18" target="_blank">RFC8881</a>.'
+ },
+
+ // ------------------------------------------------------------------------
+ // ZFS
+
+ 'zfs.arc_size': {
+ info: '<p>The size of the ARC.</p>'+
+ '<p><b>Arcsz</b> - actual size. '+
+ '<b>Target</b> - target size that the ARC is attempting to maintain (adaptive). '+
+ '<b>Min</b> - minimum size limit. When the ARC is asked to shrink, it will stop shrinking at this value. '+
+ '<b>Min</b> - maximum size limit.</p>'
+ },
+
+ 'zfs.l2_size': {
+ info: '<p>The size of the L2ARC.</p>'+
+ '<p><b>Actual</b> - size of compressed data. '+
+ '<b>Size</b> - size of uncompressed data.</p>'
+ },
+
+ 'zfs.reads': {
+ info: '<p>The number of read requests.</p>'+
+ '<p><b>ARC</b> - all prefetch and demand requests. '+
+ '<b>Demand</b> - triggered by an application request. '+
+ '<b>Prefetch</b> - triggered by the prefetch mechanism, not directly from an application request. '+
+ '<b>Metadata</b> - metadata read requests. '+
+ '<b>L2</b> - L2ARC read requests.</p>'
+ },
+
+ 'zfs.bytes': {
+ info: 'The amount of data transferred to and from the L2ARC cache devices.'
+ },
+
+ 'zfs.hits': {
+ info: '<p>Hit rate of the ARC read requests.</p>'+
+ '<p><b>Hits</b> - a data block was in the ARC DRAM cache and returned. '+
+ '<b>Misses</b> - a data block was not in the ARC DRAM cache. '+
+ 'It will be read from the L2ARC cache devices (if available and the data is cached on them) or the pool disks.</p>'
+ },
+
+ 'zfs.dhits': {
+ info: '<p>Hit rate of the ARC data and metadata demand read requests. '+
+ 'Demand requests are triggered by an application request.</p>'+
+ '<p><b>Hits</b> - a data block was in the ARC DRAM cache and returned. '+
+ '<b>Misses</b> - a data block was not in the ARC DRAM cache. '+
+ 'It will be read from the L2ARC cache devices (if available and the data is cached on them) or the pool disks.</p>'
+ },
+
+ 'zfs.phits': {
+ info: '<p>Hit rate of the ARC data and metadata prefetch read requests. '+
+ 'Prefetch requests are triggered by the prefetch mechanism, not directly from an application request.</p>'+
+ '<p><b>Hits</b> - a data block was in the ARC DRAM cache and returned. '+
+ '<b>Misses</b> - a data block was not in the ARC DRAM cache. '+
+ 'It will be read from the L2ARC cache devices (if available and the data is cached on them) or the pool disks.</p>'
+ },
+
+ 'zfs.mhits': {
+ info: '<p>Hit rate of the ARC metadata read requests.</p>'+
+ '<p><b>Hits</b> - a data block was in the ARC DRAM cache and returned. '+
+ '<b>Misses</b> - a data block was not in the ARC DRAM cache. '+
+ 'It will be read from the L2ARC cache devices (if available and the data is cached on them) or the pool disks.</p>'
+ },
+
+ 'zfs.l2hits': {
+ info: '<p>Hit rate of the L2ARC lookups.</p>'+
+ '</p><b>Hits</b> - a data block was in the L2ARC cache and returned. '+
+ '<b>Misses</b> - a data block was not in the L2ARC cache. '+
+ 'It will be read from the pool disks.</p>'
+ },
+
+ 'zfs.demand_data_hits': {
+ info: '<p>Hit rate of the ARC data demand read requests. '+
+ 'Demand requests are triggered by an application request.</p>'+
+ '<b>Hits</b> - a data block was in the ARC DRAM cache and returned. '+
+ '<b>Misses</b> - a data block was not in the ARC DRAM cache. '+
+ 'It will be read from the L2ARC cache devices (if available and the data is cached on them) or the pool disks.</p>'
+ },
+
+ 'zfs.prefetch_data_hits': {
+ info: '<p>Hit rate of the ARC data prefetch read requests. '+
+ 'Prefetch requests are triggered by the prefetch mechanism, not directly from an application request.</p>'+
+ '<p><b>Hits</b> - a data block was in the ARC DRAM cache and returned. '+
+ '<b>Misses</b> - a data block was not in the ARC DRAM cache. '+
+ 'It will be read from the L2ARC cache devices (if available and the data is cached on them) or the pool disks.</p>'
+ },
+
+ 'zfs.list_hits': {
+ info: 'MRU (most recently used) and MFU (most frequently used) cache list hits. '+
+ 'MRU and MFU lists contain metadata for requested blocks which are cached. '+
+ 'Ghost lists contain metadata of the evicted pages on disk.'
+ },
+
+ 'zfs.arc_size_breakdown': {
+ info: 'The size of MRU (most recently used) and MFU (most frequently used) cache.'
+ },
+
+ 'zfs.memory_ops': {
+ info: '<p>Memory operation statistics.</p>'+
+ '<p><b>Direct</b> - synchronous memory reclaim. Data is evicted from the ARC and free slabs reaped. '+
+ '<b>Throttled</b> - number of times that ZFS had to limit the ARC growth. '+
+ 'A constant increasing of the this value can indicate excessive pressure to evict data from the ARC. '+
+ '<b>Indirect</b> - asynchronous memory reclaim. It reaps free slabs from the ARC cache.</p>'
+ },
+
+ 'zfs.important_ops': {
+ info: '<p>Eviction and insertion operation statistics.</p>'+
+ '<p><b>EvictSkip</b> - skipped data eviction operations. '+
+ '<b>Deleted</b> - old data is evicted (deleted) from the cache. '+
+ '<b>MutexMiss</b> - an attempt to get hash or data block mutex when it is locked during eviction. '+
+ '<b>HashCollisions</b> - occurs when two distinct data block numbers have the same hash value.</p>'
+ },
+
+ 'zfs.actual_hits': {
+ info: '<p>MRU and MFU cache hit rate.</p>'+
+ '<p><b>Hits</b> - a data block was in the ARC DRAM cache and returned. '+
+ '<b>Misses</b> - a data block was not in the ARC DRAM cache. '+
+ 'It will be read from the L2ARC cache devices (if available and the data is cached on them) or the pool disks.</p>'
+ },
+
+ 'zfs.hash_elements': {
+ info: '<p>Data Virtual Address (DVA) hash table element statistics.</p>'+
+ '<p><b>Current</b> - current number of elements. '+
+ '<b>Max</b> - maximum number of elements seen.</p>'
+ },
+
+ 'zfs.hash_chains': {
+ info: '<p>Data Virtual Address (DVA) hash table chain statistics. '+
+ 'A chain is formed when two or more distinct data block numbers have the same hash value.</p>'+
+ '<p><b>Current</b> - current number of chains. '+
+ '<b>Max</b> - longest length seen for a chain. '+
+ 'If the value is high, performance may degrade as the hash locks are held longer while the chains are walked.</p>'
},
// ------------------------------------------------------------------------
// ZFS pools
'zfspool.state': {
- info: 'ZFS pool state. The overall health of a pool, as reported by <code>zpool status</code>, is determined by the aggregate state of all devices within the pool. ' +
- 'For details, see <a href="https://openzfs.github.io/openzfs-docs/man/8/zpoolconcepts.8.html?#Device_Failure_and_Recovery" target="_blank"> ZFS documentation</a>.'
+ info: 'ZFS pool state. '+
+ 'The overall health of a pool, as reported by <code>zpool status</code>, '+
+ 'is determined by the aggregate state of all devices within the pool. ' +
+ 'For states description, '+
+ 'see <a href="https://openzfs.github.io/openzfs-docs/man/7/zpoolconcepts.7.html#Device_Failure_and_Recovery" target="_blank"> ZFS documentation</a>.'
},
// ------------------------------------------------------------------------
@@ -1679,6 +3403,25 @@ netdataDashboard.context = {
'Assuming non-superuser accounts are being used to connect to Postgres (so <i>superuser_reserved_connections</i> are subtracted from <i>max_connections</i>).<br/>' +
'For more information see <a href="https://www.postgresql.org/docs/current/runtime-config-connection.html" target="_blank">Connections and Authentication</a>.'
},
+ 'postgres.forced_autovacuum': {
+ info: 'Percent towards forced autovacuum for one or more tables.<ul>' +
+ '<li><strong>percent_towards_forced_autovacuum:</strong> a forced autovacuum will run once this value reaches 100.</li>' +
+ '</ul>' +
+ 'For more information see <a href="https://www.postgresql.org/docs/current/routine-vacuuming.html" target="_blank">Preventing Transaction ID Wraparound Failures</a>.'
+ },
+ 'postgres.tx_wraparound_oldest_current_xid': {
+ info: 'The oldest current transaction id (xid).<ul>' +
+ '<li><strong>oldest_current_xid:</strong> oldest current transaction id.</li>' +
+ '</ul>' +
+ 'If for some reason autovacuum fails to clear old XIDs from a table, the system will begin to emit warning messages when the database\'s oldest XIDs reach eleven million transactions from the wraparound point.<br/>' +
+ 'For more information see <a href="https://www.postgresql.org/docs/current/routine-vacuuming.html" target="_blank">Preventing Transaction ID Wraparound Failures</a>.'
+ },
+ 'postgres.percent_towards_wraparound': {
+ info: 'Percent towards transaction wraparound.<ul>' +
+ '<li><strong>percent_towards_wraparound:</strong> transaction wraparound may occur when this value reaches 100.</li>' +
+ '</ul>' +
+ 'For more information see <a href="https://www.postgresql.org/docs/current/routine-vacuuming.html" target="_blank">Preventing Transaction ID Wraparound Failures</a>.'
+ },
// ------------------------------------------------------------------------
@@ -1916,7 +3659,10 @@ netdataDashboard.context = {
+ ' data-colors="' + NETDATA.colors[4] + '"'
+ ' role="application"></div>';
}
- ]
+ ],
+ info: 'Total CPU utilization within the configured or system-wide (if not set) limits. '+
+ 'When the CPU utilization of a cgroup exceeds the limit for the configured period, '+
+ 'the tasks belonging to its hierarchy will be throttled and are not allowed to run again until the next period.'
},
'cgroup.cpu': {
@@ -1938,7 +3684,26 @@ netdataDashboard.context = {
} else
return '';
}
- ]
+ ],
+ info: 'Total CPU utilization within the system-wide CPU resources (all cores). '+
+ 'The amount of time spent by tasks of the cgroup in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">user and kernel</a> modes.'
+ },
+
+ 'cgroup.cpu_per_core': {
+ info: 'Total CPU utilization per core within the system-wide CPU resources.'
+ },
+
+ 'cgroup.cpu_pressure': {
+ info: 'CPU <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+
+ '<b>Some</b> indicates the share of time in which at least some tasks are stalled on CPU. '+
+ 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
+ },
+
+ 'cgroup.mem_utilization': {
+ info: 'RAM utilization within the configured or system-wide (if not set) limits. '+
+ 'When the RAM utilization of a cgroup exceeds the limit, '+
+ 'OOM killer will start killing the tasks belonging to the cgroup.'
},
'cgroup.mem_usage_limit': {
@@ -1961,7 +3726,10 @@ netdataDashboard.context = {
+ ' data-colors="' + NETDATA.colors[1] + '"'
+ ' role="application"></div>';
}
- ]
+ ],
+ info: 'RAM usage within the configured or system-wide (if not set) limits. '+
+ 'When the RAM usage of a cgroup exceeds the limit, '+
+ 'OOM killer will start killing the tasks belonging to the cgroup.'
},
'cgroup.mem_usage': {
@@ -1983,7 +3751,67 @@ netdataDashboard.context = {
} else
return '';
}
- ]
+ ],
+ info: 'The amount of used RAM and swap memory.'
+ },
+
+ 'cgroup.mem': {
+ info: 'Memory usage statistics. '+
+ 'The individual metrics are described in the memory.stat section for '+
+ '<a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/memory.html#per-memory-cgroup-local-status" target="_blank">cgroup-v1 </a>'+
+ 'and '+
+ '<a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files" target="_blank">cgroup-v2</a>.'
+ },
+
+ 'cgroup.mem_failcnt': {
+ info: 'The number of memory usage hits limits.'
+ },
+
+ 'cgroup.writeback': {
+ info: '<b>Dirty</b> is the amount of memory waiting to be written to disk. <b>Writeback</b> is how much memory is actively being written to disk.'
+ },
+
+ 'cgroup.mem_activity': {
+ info: '<p>Memory accounting statistics.</p>'+
+ '<p><b>In</b> - a page is accounted as either mapped anon page (RSS) or cache page (Page Cache) to the cgroup. '+
+ '<b>Out</b> - a page is unaccounted from the cgroup.</p>'
+ },
+
+ 'cgroup.pgfaults': {
+ info: '<p>Memory <a href="https://en.wikipedia.org/wiki/Page_fault" target="_blank">page fault</a> statistics.</p>'+
+ '<p><b>Pgfault</b> - all page faults. '+
+ '<b>Swap</b> - major page faults.</p>'
+ },
+
+ 'cgroup.memory_pressure': {
+ info: 'Memory <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+
+ '<b>Some</b> indicates the share of time in which at least some tasks are stalled on memory. '+
+ 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
+ },
+
+ 'cgroup.memory_full_pressure': {
+ info: 'Memory <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+
+ '<b>Full</b> indicates the share of time in which all non-idle tasks are stalled on memory simultaneously. '+
+ 'In this state actual CPU cycles are going to waste, '+
+ 'and a workload that spends extended time in this state is considered to be thrashing. '+
+ 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
+ },
+
+ 'cgroup.io': {
+ info: 'The amount of data transferred to and from specific devices as seen by the CFQ scheduler. '+
+ 'It is not updated when the CFQ scheduler is operating on a request queue.'
+ },
+
+ 'cgroup.serviced_ops': {
+ info: 'The number of I/O operations performed on specific devices as seen by the CFQ scheduler.'
+ },
+
+ 'cgroup.queued_ops': {
+ info: 'The number of requests queued for I/O operations.'
+ },
+
+ 'cgroup.merged_ops': {
+ info: 'The number of BIOS requests merged into requests for I/O operations.'
},
'cgroup.throttle_io': {
@@ -2018,7 +3846,447 @@ netdataDashboard.context = {
+ ' data-colors="' + NETDATA.colors[3] + '"'
+ ' role="application"></div>';
}
- ]
+ ],
+ info: 'The amount of data transferred to and from specific devices as seen by the throttling policy.'
+ },
+
+ 'cgroup.throttle_serviced_ops': {
+ info: 'The number of I/O operations performed on specific devices as seen by the throttling policy.'
+ },
+
+ 'cgroup.io_pressure': {
+ info: 'I/O <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+
+ '<b>Some</b> indicates the share of time in which at least some tasks are stalled on I/O. '+
+ 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
+ },
+
+ 'cgroup.io_full_pressure': {
+ info: 'I/O <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+
+ '<b>Full</b> indicates the share of time in which all non-idle tasks are stalled on I/O simultaneously. '+
+ 'In this state actual CPU cycles are going to waste, '+
+ 'and a workload that spends extended time in this state is considered to be thrashing. '+
+ 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
+ },
+
+ 'cgroup.swap_read': {
+ info: 'The function <code>swap_readpage</code> is called when the kernel reads a page from swap memory. This chart is provided by eBPF plugin.'
+ },
+
+ 'cgroup.swap_write': {
+ info: 'The function <code>swap_writepage</code> is called when the kernel writes a page to swap memory. This chart is provided by eBPF plugin.'
+ },
+
+ 'cgroup.fd_open': {
+ info: 'Calls to the internal function <code>do_sys_open</code> (for kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ), which is the common function called from' +
+ ' <a href="https://www.man7.org/linux/man-pages/man2/open.2.html" target="_blank">open(2)</a> ' +
+ ' and <a href="https://www.man7.org/linux/man-pages/man2/openat.2.html" target="_blank">openat(2)</a>. '
+ },
+
+ 'cgroup.fd_open_error': {
+ info: 'Failed calls to the internal function <code>do_sys_open</code> (for kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ).'
+ },
+
+ 'cgroup.fd_close': {
+ info: 'Calls to the internal function <a href="https://elixir.bootlin.com/linux/v5.10/source/fs/file.c#L665" target="_blank">__close_fd</a> or <a href="https://elixir.bootlin.com/linux/v5.11/source/fs/file.c#L617" target="_blank">close_fd</a> according to your kernel version, which is called from' +
+ ' <a href="https://www.man7.org/linux/man-pages/man2/close.2.html" target="_blank">close(2)</a>. '
+ },
+
+ 'cgroup.fd_close_error': {
+ info: 'Failed calls to the internal function <a href="https://elixir.bootlin.com/linux/v5.10/source/fs/file.c#L665" target="_blank">__close_fd</a> or <a href="https://elixir.bootlin.com/linux/v5.11/source/fs/file.c#L617" target="_blank">close_fd</a> according to your kernel version.'
+ },
+
+ 'cgroup.vfs_unlink': {
+ info: 'Calls to the function <a href="https://www.kernel.org/doc/htmldocs/filesystems/API-vfs-unlink.html" target="_blank">vfs_unlink</a>. This chart does not show all events that remove files from the filesystem, because filesystems can create their own functions to remove files.'
+ },
+
+ 'cgroup.vfs_write': {
+ info: 'Successful calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'cgroup.vfs_write_error': {
+ info: 'Failed calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'cgroup.vfs_read': {
+ info: 'Successful calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'cgroup.vfs_read_error': {
+ info: 'Failed calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'cgroup.vfs_write_bytes': {
+ info: 'Total of bytes successfully written using the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>.'
+ },
+
+ 'cgroup.vfs_read_bytes': {
+ info: 'Total of bytes successfully read using the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a>.'
+ },
+
+ 'cgroup.process_create': {
+ info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the process by counting the number of calls to <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that do not have the flag <code>CLONE_THREAD</code> set.'
+ },
+
+ 'cgroup.thread_create': {
+ info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls to <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that have the flag <code>CLONE_THREAD</code> set.'
+ },
+
+ 'cgroup.task_exit': {
+ info: 'Calls to the function responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) tasks.'
+ },
+
+ 'cgroup.task_close': {
+ info: 'Calls to the functions responsible for releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+ },
+
+ 'cgroup.task_error': {
+ info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
+ },
+
+
+ 'cgroup.dc_ratio': {
+ info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>. Netdata also gives a summary for these charts in <a href="#menu_filesystem_submenu_directory_cache__eBPF_">Filesystem submenu</a>.'
+ },
+
+ 'cgroup.dc_reference': {
+ info: 'Counters of file accesses. <code>Reference</code> is when there is a file access, see the <code>filesystem.dc_reference</code> chart for more context. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ },
+
+ 'cgroup.dc_not_cache': {
+ info: 'Counters of file accesses. <code>Slow</code> is when there is a file access and the file is not present in the directory cache, see the <code>filesystem.dc_reference</code> chart for more context. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ },
+
+ 'cgroup.dc_not_found': {
+ info: 'Counters of file accesses. <code>Miss</code> is when there is file access and the file is not found in the filesystem, see the <code>filesystem.dc_reference</code> chart for more context. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ },
+
+ 'cgroup.shmget': {
+ info: 'Number of times the syscall <code>shmget</code> is called. Netdata also gives a summary for these charts in <a href="#menu_system_submenu_ipc_shared_memory">System overview</a>.'
+ },
+
+ 'cgroup.shmat': {
+ info: 'Number of times the syscall <code>shmat</code> is called.'
+ },
+
+ 'cgroup.shmdt': {
+ info: 'Number of times the syscall <code>shmdt</code> is called.'
+ },
+
+ 'cgroup.shmctl': {
+ info: 'Number of times the syscall <code>shmctl</code> is called.'
+ },
+
+ 'cgroup.net_bytes_send': {
+ info: 'Bytes sent by functions <code>tcp_sendmsg</code>.'
+ },
+
+ 'cgroup.net_bytes_recv': {
+ info: 'Bytes received by functions <code>tcp_cleanup_rbuf</code> . We use <code>tcp_cleanup_rbuf</code> instead <code>tcp_recvmsg</code>, because this last misses <code>tcp_read_sock()</code> traffic and we would also need to have more probes to get the socket and package size.'
+ },
+
+ 'cgroup.net_tcp_send': {
+ info: 'The function <code>tcp_sendmsg</code> is used to collect number of bytes sent from TCP connections.'
+ },
+
+ 'cgroup.net_tcp_recv': {
+ info: 'The function <code>tcp_cleanup_rbuf</code> is used to collect number of bytes received from TCP connections.'
+ },
+
+ 'cgroup.net_retransmit': {
+ info: 'The function <code>tcp_retransmit_skb</code> is called when the host did not receive the expected return from a packet sent.'
+ },
+
+ 'cgroup.net_udp_send': {
+ info: 'The function <code>udp_sendmsg</code> is used to collect number of bytes sent from UDP connections.'
+ },
+
+ 'cgroup.net_udp_recv': {
+ info: 'The function <code>udp_recvmsg</code> is used to collect number of bytes received from UDP connections.'
+ },
+
+ 'cgroup.cachestat_ratio': {
+ info: 'When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. If the entry is there, a page cache hit has occurred and the read is from the cache. If the entry is not there, a page cache miss has occurred and the kernel allocates a new entry and copies in data from the disk. Netdata calculates the percentage of accessed files that are cached on memory. <a href="https://github.com/iovisor/bcc/blob/master/tools/cachestat.py#L126-L138" target="_blank">The ratio</a> is calculated counting the accessed cached pages (without counting dirty pages and pages added because of read misses) divided by total access without dirty pages.'
+ },
+
+ 'cgroup.cachestat_dirties': {
+ info: 'Number of <a href="https://en.wikipedia.org/wiki/Page_cache#Memory_conservation" target="_blank">dirty(modified) pages</a> cache. Pages in the page cache modified after being brought in are called dirty pages. Since non-dirty pages in the page cache have identical copies in <a href="https://en.wikipedia.org/wiki/Secondary_storage" target="_blank">secondary storage</a> (e.g. hard disk drive or solid-state drive), discarding and reusing their space is much quicker than paging out application memory, and is often preferred over flushing the dirty pages into secondary storage and reusing their space.'
+ },
+
+ 'cgroup.cachestat_hits': {
+ info: 'When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. If the entry is there, a page cache hit has occurred and the read is from the cache. Hits show pages accessed that were not modified (we are excluding dirty pages), this counting also excludes the recent pages inserted for read.'
+ },
+
+ 'cgroup.cachestat_misses': {
+ info: 'When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. If the entry is not there, a page cache miss has occurred and the cache allocates a new entry and copies in data for the main memory. Misses count page insertions to the memory not related to writing.'
+ },
+
+ // ------------------------------------------------------------------------
+ // containers (systemd)
+
+ 'services.cpu': {
+ info: 'Total CPU utilization within the system-wide CPU resources (all cores). '+
+ 'The amount of time spent by tasks of the cgroup in '+
+ '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">user and kernel</a> modes.'
+ },
+
+ 'services.mem_usage': {
+ info: 'The amount of used RAM.'
+ },
+
+ 'services.mem_rss': {
+ info: 'The amount of used '+
+ '<a href="https://en.wikipedia.org/wiki/Resident_set_size" target="_blank">RSS</a> memory. '+
+ 'It includes transparent hugepages.'
+ },
+
+ 'services.mem_mapped': {
+ info: 'The size of '+
+ '<a href="https://en.wikipedia.org/wiki/Memory-mapped_file" target="_blank">memory-mapped</a> files.'
+ },
+
+ 'services.mem_cache': {
+ info: 'The amount of used '+
+ '<a href="https://en.wikipedia.org/wiki/Page_cache" target="_blank">page cache</a> memory.'
+ },
+
+ 'services.mem_writeback': {
+ info: 'The amount of file/anon cache that is '+
+ '<a href="https://en.wikipedia.org/wiki/Cache_(computing)#Writing_policies" target="_blank">queued for syncing</a> '+
+ 'to disk.'
+ },
+
+ 'services.mem_pgfault': {
+ info: 'The number of '+
+ '<a href="https://en.wikipedia.org/wiki/Page_fault#Types" target="_blank">page faults</a>. '+
+ 'It includes both minor and major page faults.'
+ },
+
+ 'services.mem_pgmajfault': {
+ info: 'The number of '+
+ '<a href="https://en.wikipedia.org/wiki/Page_fault#Major" target="_blank">major</a> '+
+ 'page faults.'
+ },
+
+ 'services.mem_pgpgin': {
+ info: 'The amount of memory charged to the cgroup. '+
+ 'The charging event happens each time a page is accounted as either '+
+ 'mapped anon page(RSS) or cache page(Page Cache) to the cgroup.'
+ },
+
+ 'services.mem_pgpgout': {
+ info: 'The amount of memory uncharged from the cgroup. '+
+ 'The uncharging event happens each time a page is unaccounted from the cgroup.'
+ },
+
+ 'services.mem_failcnt': {
+ info: 'The number of memory usage hits limits.'
+ },
+
+ 'services.swap_usage': {
+ info: 'The amount of used '+
+ '<a href="https://en.wikipedia.org/wiki/Memory_paging#Unix_and_Unix-like_systems" target="_blank">swap</a> '+
+ 'memory.'
+ },
+
+ 'services.io_read': {
+ info: 'The amount of data transferred from specific devices as seen by the CFQ scheduler. '+
+ 'It is not updated when the CFQ scheduler is operating on a request queue.'
+ },
+
+ 'services.io_write': {
+ info: 'The amount of data transferred to specific devices as seen by the CFQ scheduler. '+
+ 'It is not updated when the CFQ scheduler is operating on a request queue.'
+ },
+
+ 'services.io_ops_read': {
+ info: 'The number of read operations performed on specific devices as seen by the CFQ scheduler.'
+ },
+
+ 'services.io_ops_write': {
+ info: 'The number write operations performed on specific devices as seen by the CFQ scheduler.'
+ },
+
+ 'services.throttle_io_read': {
+ info: 'The amount of data transferred from specific devices as seen by the throttling policy.'
+ },
+
+ 'services.throttle_io_write': {
+ info: 'The amount of data transferred to specific devices as seen by the throttling policy.'
+ },
+
+ 'services.throttle_io_ops_read': {
+ info: 'The number of read operations performed on specific devices as seen by the throttling policy.'
+ },
+
+ 'services.throttle_io_ops_write': {
+ info: 'The number of write operations performed on specific devices as seen by the throttling policy.'
+ },
+
+ 'services.queued_io_ops_read': {
+ info: 'The number of queued read requests.'
+ },
+
+ 'services.queued_io_ops_write': {
+ info: 'The number of queued write requests.'
+ },
+
+ 'services.merged_io_ops_read': {
+ info: 'The number of read requests merged.'
+ },
+
+ 'services.merged_io_ops_write': {
+ info: 'The number of write requests merged.'
+ },
+
+ 'services.swap_read': {
+ info: 'The function <code>swap_readpage</code> is called when the kernel reads a page from swap memory. This chart is provided by eBPF plugin.'
+ },
+
+ 'services.swap_write': {
+ info: 'The function <code>swap_writepage</code> is called when the kernel writes a page to swap memory. This chart is provided by eBPF plugin.'
+ },
+
+ 'services.fd_open': {
+ info: 'Calls to the internal function <code>do_sys_open</code> (for kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ), which is the common function called from' +
+ ' <a href="https://www.man7.org/linux/man-pages/man2/open.2.html" target="_blank">open(2)</a> ' +
+ ' and <a href="https://www.man7.org/linux/man-pages/man2/openat.2.html" target="_blank">openat(2)</a>. '
+ },
+
+ 'services.fd_open_error': {
+ info: 'Failed calls to the internal function <code>do_sys_open</code> (for kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ).'
+ },
+
+ 'services.fd_close': {
+ info: 'Calls to the internal function <a href="https://elixir.bootlin.com/linux/v5.10/source/fs/file.c#L665" target="_blank">__close_fd</a> or <a href="https://elixir.bootlin.com/linux/v5.11/source/fs/file.c#L617" target="_blank">close_fd</a> according to your kernel version, which is called from' +
+ ' <a href="https://www.man7.org/linux/man-pages/man2/close.2.html" target="_blank">close(2)</a>. '
+ },
+
+ 'services.fd_close_error': {
+ info: 'Failed calls to the internal function <a href="https://elixir.bootlin.com/linux/v5.10/source/fs/file.c#L665" target="_blank">__close_fd</a> or <a href="https://elixir.bootlin.com/linux/v5.11/source/fs/file.c#L617" target="_blank">close_fd</a> according to your kernel version.'
+ },
+
+ 'services.vfs_unlink': {
+ info: 'Calls to the function <a href="https://www.kernel.org/doc/htmldocs/filesystems/API-vfs-unlink.html" target="_blank">vfs_unlink</a>. This chart does not show all events that remove files from the filesystem, because filesystems can create their own functions to remove files.'
+ },
+
+ 'services.vfs_write': {
+ info: 'Successful calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'services.vfs_write_error': {
+ info: 'Failed calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'services.vfs_read': {
+ info: 'Successful calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'services.vfs_read_error': {
+ info: 'Failed calls to the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a>. This chart may not show all filesystem events if it uses other functions to store data on disk.'
+ },
+
+ 'services.vfs_write_bytes': {
+ info: 'Total of bytes successfully written using the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>.'
+ },
+
+ 'services.vfs_read_bytes': {
+ info: 'Total of bytes successfully read using the function <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a>.'
+ },
+
+ 'services.process_create': {
+ info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the process by counting the number of calls to <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that do not have the flag <code>CLONE_THREAD</code> set.'
+ },
+
+ 'services.thread_create': {
+ info: 'Calls to either <a href="https://programming.vip/docs/the-execution-procedure-of-do_fork-function-in-linux.html" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls to <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that have the flag <code>CLONE_THREAD</code> set.'
+ },
+
+ 'services.task_exit': {
+ info: 'Calls to the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) tasks.'
+ },
+
+ 'services.task_close': {
+ info: 'Calls to the functions responsible for releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+ },
+
+ 'services.task_error': {
+ info: 'Number of errors to create a new process or thread. This chart is provided by eBPF plugin.'
+ },
+
+ 'services.dc_ratio': {
+ info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>. Netdata also gives a summary for these charts in <a href="#menu_filesystem_submenu_directory_cache__eBPF_">Filesystem submenu</a>.'
+ },
+
+ 'services.dc_reference': {
+ info: 'Counters of file accesses. <code>Reference</code> is when there is a file access, see the <code>filesystem.dc_reference</code> chart for more context. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ },
+
+ 'services.dc_not_cache': {
+ info: 'Counters of file accesses. <code>Slow</code> is when there is a file access and the file is not present in the directory cache, see the <code>filesystem.dc_reference</code> chart for more context. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ },
+
+ 'services.dc_not_found': {
+ info: 'Counters of file accesses. <code>Miss</code> is when there is file access and the file is not found in the filesystem, see the <code>filesystem.dc_reference</code> chart for more context. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>.'
+ },
+
+ 'services.shmget': {
+ info: 'Number of times the syscall <code>shmget</code> is called. Netdata also gives a summary for these charts in <a href="#menu_system_submenu_ipc_shared_memory">System overview</a>.'
+ },
+
+ 'services.shmat': {
+ info: 'Number of times the syscall <code>shmat</code> is called.'
+ },
+
+ 'services.shmdt': {
+ info: 'Number of times the syscall <code>shmdt</code> is called.'
+ },
+
+ 'services.shmctl': {
+ info: 'Number of times the syscall <code>shmctl</code> is called.'
+ },
+
+ 'services.net_bytes_send': {
+ info: 'Bytes sent by functions <code>tcp_sendmsg</code>.'
+ },
+
+ 'services.net_bytes_recv': {
+ info: 'Bytes received by functions <code>tcp_cleanup_rbuf</code> . We use <code>tcp_cleanup_rbuf</code> instead <code>tcp_recvmsg</code>, because this last misses <code>tcp_read_sock()</code> traffic and we would also need to have more probes to get the socket and package size.'
+ },
+
+ 'services.net_tcp_send': {
+ info: 'The function <code>tcp_sendmsg</code> is used to collect number of bytes sent from TCP connections.'
+ },
+
+ 'services.net_tcp_recv': {
+ info: 'The function <code>tcp_cleanup_rbuf</code> is used to collect number of bytes received from TCP connections.'
+ },
+
+ 'services.net_retransmit': {
+ info: 'The function <code>tcp_retransmit_skb</code> is called when the host did not receive the expected return from a packet sent.'
+ },
+
+ 'services.net_udp_send': {
+ info: 'The function <code>udp_sendmsg</code> is used to collect number of bytes sent from UDP connections.'
+ },
+
+ 'services.net_udp_recv': {
+ info: 'The function <code>udp_recvmsg</code> is used to collect number of bytes received from UDP connections.'
+ },
+
+ 'services.cachestat_ratio': {
+ info: 'When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. If the entry is there, a page cache hit has occurred and the read is from the cache. If the entry is not there, a page cache miss has occurred and the kernel allocates a new entry and copies in data from the disk. Netdata calculates the percentage of accessed files that are cached on memory. <a href="https://github.com/iovisor/bcc/blob/master/tools/cachestat.py#L126-L138" target="_blank">The ratio</a> is calculated counting the accessed cached pages (without counting dirty pages and pages added because of read misses) divided by total access without dirty pages.'
+ },
+
+ 'services.cachestat_dirties': {
+ info: 'Number of <a href="https://en.wikipedia.org/wiki/Page_cache#Memory_conservation" target="_blank">dirty(modified) pages</a> cache. Pages in the page cache modified after being brought in are called dirty pages. Since non-dirty pages in the page cache have identical copies in <a href="https://en.wikipedia.org/wiki/Secondary_storage" target="_blank">secondary storage</a> (e.g. hard disk drive or solid-state drive), discarding and reusing their space is much quicker than paging out application memory, and is often preferred over flushing the dirty pages into secondary storage and reusing their space.'
+ },
+
+ 'services.cachestat_hits': {
+ info: 'When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. If the entry is there, a page cache hit has occurred and the read is from the cache. Hits show pages accessed that were not modified (we are excluding dirty pages), this counting also excludes the recent pages inserted for read.'
+ },
+
+ 'services.cachestat_misses': {
+ info: 'When the processor needs to read or write a location in main memory, it checks for a corresponding entry in the page cache. If the entry is not there, a page cache miss has occurred and the cache allocates a new entry and copies in data for the main memory. Misses count page insertions to the memory not related to writing.'
},
// ------------------------------------------------------------------------
@@ -2411,7 +4679,7 @@ netdataDashboard.context = {
'web_log.squid_code': {
info: 'These are combined squid result status codes. A break down per component is given in the following charts. ' +
- 'Check the <a href="http://wiki.squid-cache.org/SquidFaq/SquidLogs">squid documentation about them</a>.'
+ 'Check the <a href="http://wiki.squid-cache.org/SquidFaq/SquidLogs" target="_blank">squid documentation about them</a>.'
},
'web_log.squid_handling_opts': {
@@ -2652,11 +4920,11 @@ netdataDashboard.context = {
},
'couchdb.replicator_jobs': {
- info: 'Detailed breakdown of any replication jobs in progress on this node. For more information, see the <a href="http://docs.couchdb.org/en/latest/replication/replicator.html">replicator documentation</a>.'
+ info: 'Detailed breakdown of any replication jobs in progress on this node. For more information, see the <a href="http://docs.couchdb.org/en/latest/replication/replicator.html" target="_blank">replicator documentation</a>.'
},
'couchdb.open_files': {
- info: 'Count of all files held open by CouchDB. If this value seems pegged at 1024 or 4096, your server process is probably hitting the open file handle limit and <a href="http://docs.couchdb.org/en/latest/maintenance/performance.html#pam-and-ulimit">needs to be increased.</a>'
+ info: 'Count of all files held open by CouchDB. If this value seems pegged at 1024 or 4096, your server process is probably hitting the open file handle limit and <a href="http://docs.couchdb.org/en/latest/maintenance/performance.html#pam-and-ulimit" target="_blank">needs to be increased.</a>'
},
'btrfs.disk': {
@@ -2773,7 +5041,7 @@ netdataDashboard.context = {
},
'ntpd.sys_tc': {
- info: 'Time constants and poll intervals are expressed as exponents of 2. The default poll exponent of 6 corresponds to a poll interval of 64 s. For typical Internet paths, the optimum poll interval is about 64 s. For fast LANs with modern computers, a poll exponent of 4 (16 s) is appropriate. The <a href="http://doc.ntp.org/current-stable/poll.html">poll process</a> sends NTP packets at intervals determined by the clock discipline algorithm.',
+ info: 'Time constants and poll intervals are expressed as exponents of 2. The default poll exponent of 6 corresponds to a poll interval of 64 s. For typical Internet paths, the optimum poll interval is about 64 s. For fast LANs with modern computers, a poll exponent of 4 (16 s) is appropriate. The <a href="http://doc.ntp.org/current-stable/poll.html" target="_blank">poll process</a> sends NTP packets at intervals determined by the clock discipline algorithm.',
height: 0.5
},
@@ -2799,7 +5067,7 @@ netdataDashboard.context = {
},
'ntpd.peer_xleave': {
- info: 'This variable is used in interleaved mode (used only in NTP symmetric and broadcast modes). See <a href="http://doc.ntp.org/current-stable/xleave.html">NTP Interleaved Modes</a>.'
+ info: 'This variable is used in interleaved mode (used only in NTP symmetric and broadcast modes). See <a href="http://doc.ntp.org/current-stable/xleave.html" target="_blank">NTP Interleaved Modes</a>.'
},
'ntpd.peer_rootdelay': {
@@ -2942,19 +5210,34 @@ netdataDashboard.context = {
// Power Supplies
'powersupply.capacity': {
- info: undefined
+ info: 'The current battery charge.'
},
'powersupply.charge': {
- info: undefined
+ info: '<p>The battery charge in Amp-hours.</p>'+
+ '<p><b>now</b> - actual charge value. '+
+ '<b>full</b>, <b>empty</b> - last remembered value of charge when battery became full/empty. '+
+ 'It also could mean "value of charge when battery considered full/empty at given conditions (temperature, age)". '+
+ 'I.e. these attributes represents real thresholds, not design values. ' +
+ '<b>full_design</b>, <b>empty_design</b> - design charge values, when battery considered full/empty.</p>'
},
'powersupply.energy': {
- info: undefined
+ info: '<p>The battery charge in Watt-hours.</p>'+
+ '<p><b>now</b> - actual charge value. '+
+ '<b>full</b>, <b>empty</b> - last remembered value of charge when battery became full/empty. '+
+ 'It also could mean "value of charge when battery considered full/empty at given conditions (temperature, age)". '+
+ 'I.e. these attributes represents real thresholds, not design values. ' +
+ '<b>full_design</b>, <b>empty_design</b> - design charge values, when battery considered full/empty.</p>'
},
'powersupply.voltage': {
- info: undefined
+ info: '<p>The power supply voltage.</p>'+
+ '<p><b>now</b> - current voltage. '+
+ '<b>max</b>, <b>min</b> - voltage values that hardware could only guess (measure and retain) the thresholds '+
+ 'of a given power supply. '+
+ '<b>max_design</b>, <b>min_design</b> - design values for maximal and minimal power supply voltages. '+
+ 'Maximal/minimal means values of voltages when battery considered "full"/"empty" at normal conditions.</p>'
},
// ------------------------------------------------------------------------
@@ -2975,8 +5258,8 @@ netdataDashboard.context = {
'<code>shared</code> is sum of all shared metrics for all powered-on virtual machines, plus amount for vSphere services on the host. ' +
'<code>sharedcommon</code> is amount of machine memory that is shared by all powered-on virtual machines and vSphere services on the host. ' +
'<code>shared</code> - <code>sharedcommon</code> = machine memory (host memory) savings (KB). ' +
- 'For details see <a href="https://docs.vmware.com/en/VMware-vSphere/6.5/com.vmware.vsphere.resmgmt.doc/GUID-BFDC988B-F53D-4E97-9793-A002445AFAE1.html">Measuring and Differentiating Types of Memory Usage</a> and ' +
- '<a href="https://www.vmware.com/support/developer/converter-sdk/conv51_apireference/memory_counters.html">Memory Counters</a> articles.'
+ 'For details see <a href="https://docs.vmware.com/en/VMware-vSphere/6.5/com.vmware.vsphere.resmgmt.doc/GUID-BFDC988B-F53D-4E97-9793-A002445AFAE1.html" target="_blank">Measuring and Differentiating Types of Memory Usage</a> and ' +
+ '<a href="https://www.vmware.com/support/developer/converter-sdk/conv51_apireference/memory_counters.html" target="_blank">Memory Counters</a> articles.'
},
'vsphere.host_mem_swap_rate': {
@@ -3000,8 +5283,8 @@ netdataDashboard.context = {
'<code>consumed</code> = <code>granted</code> - <code>memory saved due to memory sharing</code>. ' +
'<code>active</code> is amount of memory that is actively used, as estimated by VMkernel based on recently touched memory pages. ' +
'<code>shared</code> is amount of guest “physical” memory shared with other virtual machines (through the VMkernel’s transparent page-sharing mechanism, a RAM de-duplication technique). ' +
- 'For details see <a href="https://docs.vmware.com/en/VMware-vSphere/6.5/com.vmware.vsphere.resmgmt.doc/GUID-BFDC988B-F53D-4E97-9793-A002445AFAE1.html">Measuring and Differentiating Types of Memory Usage</a> and ' +
- '<a href="https://www.vmware.com/support/developer/converter-sdk/conv51_apireference/memory_counters.html">Memory Counters</a> articles.'
+ 'For details see <a href="https://docs.vmware.com/en/VMware-vSphere/6.5/com.vmware.vsphere.resmgmt.doc/GUID-BFDC988B-F53D-4E97-9793-A002445AFAE1.html" target="_blank">Measuring and Differentiating Types of Memory Usage</a> and ' +
+ '<a href="https://www.vmware.com/support/developer/converter-sdk/conv51_apireference/memory_counters.html" target="_blank">Memory Counters</a> articles.'
},
@@ -3152,7 +5435,7 @@ netdataDashboard.context = {
'squidlog.cache_result_code_requests': {
info: 'The Squid result code is composed of several tags (separated by underscore characters) which describe the response sent to the client. ' +
- 'Check the <a href="https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes">squid documentation</a> about them.'
+ 'Check the <a href="https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes" target="_blank">squid documentation</a> about them.'
},
'squidlog.cache_result_code_transport_tag_requests': {
@@ -3214,7 +5497,7 @@ netdataDashboard.context = {
},
'squidlog.http_method_requests': {
- info: 'The request method to obtain an object. Please refer to section <a href="https://wiki.squid-cache.org/SquidFaq/SquidLogs#Request_methods">request-methods</a> for available methods and their description.'
+ info: 'The request method to obtain an object. Please refer to section <a href="https://wiki.squid-cache.org/SquidFaq/SquidLogs#Request_methods" target="_blank">request-methods</a> for available methods and their description.'
},
'squidlog.hier_code_requests': {
@@ -3409,40 +5692,148 @@ netdataDashboard.context = {
},
// ------------------------------------------------------------------------
- // eBPF
+ // Perf
- 'ebpf.tcp_functions': {
- title : 'TCP calls',
- info: 'Successful or failed calls to functions <code>tcp_sendmsg</code>, <code>tcp_cleanup_rbuf</code> and <code>tcp_close</code>.'
+ 'perf.instructions_per_cycle': {
+ info: 'An IPC < 1.0 likely means memory bound, and an IPC > 1.0 likely means instruction bound. For more details about the metric take a look at this <a href="https://www.brendangregg.com/blog/2017-05-09/cpu-utilization-is-wrong.html" target="_blank">blog post</a>.'
},
- 'ebpf.tcp_bandwidth': {
- title : 'TCP bandwidth',
- info: 'Bytes sent and received for functions <code>tcp_sendmsg</code> and <code>tcp_cleanup_rbuf</code>. We use <code>tcp_cleanup_rbuf</code> instead <code>tcp_recvmsg</code>, because this last misses <code>tcp_read_sock()</code> traffic and we would also need to have more probes to get the socket and package size.'
+ // ------------------------------------------------------------------------
+ // Filesystem
+
+ 'filesystem.vfs_deleted_objects': {
+ title : 'VFS remove',
+ info: 'This chart does not show all events that remove files from the file system, because file systems can create their own functions to remove files, it shows calls for the function <code>vfs_unlink</code>. '
},
- 'ebpf.tcp_retransmit': {
- title : 'TCP retransmit',
- info: 'Number of packets retransmitted for function <code>tcp_retransmit_skb</code>.'
+ 'filesystem.vfs_io': {
+ title : 'VFS IO',
+ info: 'Successful or failed calls to functions <code>vfs_read</code> and <code>vfs_write</code>. This chart may not show all file system events if it uses other functions to store data on disk.'
},
- 'ebpf.tcp_error': {
- title : 'TCP errors',
- info: 'Failed calls that to functions <code>tcp_sendmsg</code>, <code>tcp_cleanup_rbuf</code> and <code>tcp_close</code>.'
+ 'filesystem.vfs_io_bytes': {
+ title : 'VFS bytes written',
+ info: 'Total of bytes read or written with success using the functions <code>vfs_read</code> and <code>vfs_write</code>.'
},
- 'ebpf.udp_functions': {
- title : 'UDP calls',
- info: 'Successful or failed calls to functions <code>udp_sendmsg</code> and <code>udp_recvmsg</code>.'
+ 'filesystem.vfs_io_error': {
+ title : 'VFS IO error',
+ info: 'Failed calls to functions <code>vfs_read</code> and <code>vfs_write</code>.'
},
- 'ebpf.udp_bandwidth': {
- title : 'UDP bandwidth',
- info: 'Bytes sent and received for functions <code>udp_sendmsg</code> and <code>udp_recvmsg</code>.'
+ 'filesystem.vfs_fsync': {
+ info: 'Successful or failed calls to functions <code>vfs_fsync</code>.'
+ },
+
+ 'filesystem.vfs_fsync_error': {
+ info: 'Failed calls to functions <code>vfs_fsync</code>.'
},
- 'ebpf.file_descriptor': {
- title : 'File access',
+ 'filesystem.vfs_open': {
+ info: 'Successful or failed calls to functions <code>vfs_open</code>.'
+ },
+
+ 'filesystem.vfs_open_error': {
+ info: 'Failed calls to functions <code>vfs_open</code>.'
+ },
+
+ 'filesystem.vfs_create': {
+ info: 'Successful or failed calls to functions <code>vfs_create</code>.'
+ },
+
+ 'filesystem.vfs_create_error': {
+ info: 'Failed calls to functions <code>vfs_create</code>.'
+ },
+
+ 'filesystem.ext4_read_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>ext4_file_read_iter</code>.'
+ },
+
+ 'filesystem.ext4_write_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>ext4_file_write_iter</code>.'
+ },
+
+ 'filesystem.ext4_open_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>ext4_file_open</code>.'
+ },
+
+ 'filesystem.ext4_sync_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>ext4_sync_file</code>.'
+ },
+
+ 'filesystem.xfs_read_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>xfs_file_read_iter</code>.'
+ },
+
+ 'filesystem.xfs_write_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>xfs_file_write_iter</code>.'
+ },
+
+ 'filesystem.xfs_open_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>xfs_file_open</code>.'
+ },
+
+ 'filesystem.xfs_sync_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>xfs_file_sync</code>.'
+ },
+
+ 'filesystem.nfs_read_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>nfs_file_read</code>.'
+ },
+
+ 'filesystem.nfs_write_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>nfs_file_write</code>.'
+ },
+
+ 'filesystem.nfs_open_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for functions <code>nfs_file_open</code> and <code>nfs4_file_open</code>'
+ },
+
+ 'filesystem.nfs_attribute_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for the function <code>nfs_getattr</code>.'
+ },
+
+ 'filesystem.zfs_read_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>zpl_iter_read</code>.'
+ },
+
+ 'filesystem.zfs_write_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>zpl_iter_write</code>.'
+ },
+
+ 'filesystem.zfs_open_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>zpl_open</code>.'
+ },
+
+ 'filesystem.zfs_sync_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>zpl_fsync</code>.'
+ },
+
+ 'filesystem.btrfs_read_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>btrfs_file_read_iter</code> (kernel newer than 5.9.16) or the function <code>generic_file_read_iter</code> (old kernels).'
+ },
+
+ 'filesystem.btrfs_write_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>btrfs_file_write_iter</code>.'
+ },
+
+ 'filesystem.btrfs_open_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>btrfs_file_open</code>.'
+ },
+
+ 'filesystem.btrfs_sync_latency': {
+ info: 'Netdata is attaching <code>kprobes</code> for when the function <code>btrfs_sync_file</code>.'
+ },
+
+ 'mount_points.call': {
+ info: 'Monitor calls to syscalls <code>mount(2)</code> and <code>umount(2)</code> that are responsible for attaching or removing filesystems.'
+ },
+
+ 'mount_points.error': {
+ info: 'Monitor errors in calls to syscalls <code>mount(2)</code> and <code>umount(2)</code>.'
+ },
+
+ 'filesystem.file_descriptor': {
info: 'Calls for internal functions on Linux kernel. The open dimension is attached to the kernel internal function <code>do_sys_open</code> ( For kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ), which is the common function called from'+
' <a href="https://www.man7.org/linux/man-pages/man2/open.2.html" target="_blank">open(2)</a> ' +
' and <a href="https://www.man7.org/linux/man-pages/man2/openat.2.html" target="_blank">openat(2)</a>. ' +
@@ -3450,8 +5841,7 @@ netdataDashboard.context = {
' <a href="https://www.man7.org/linux/man-pages/man2/close.2.html" target="_blank">close(2)</a>. '
},
- 'ebpf.file_error': {
- title : 'File access error',
+ 'filesystem.file_error': {
info: 'Failed calls to the kernel internal function <code>do_sys_open</code> ( For kernels newer than <code>5.5.19</code> we add a kprobe to <code>do_sys_openat2</code>. ), which is the common function called from'+
' <a href="https://www.man7.org/linux/man-pages/man2/open.2.html" target="_blank">open(2)</a> ' +
' and <a href="https://www.man7.org/linux/man-pages/man2/openat.2.html" target="_blank">openat(2)</a>. ' +
@@ -3459,44 +5849,32 @@ netdataDashboard.context = {
' <a href="https://www.man7.org/linux/man-pages/man2/close.2.html" target="_blank">close(2)</a>. '
},
- 'ebpf.deleted_objects': {
- title : 'VFS remove',
- info: 'This chart does not show all events that remove files from the file system, because file systems can create their own functions to remove files, it shows calls for the function <a href="https://www.kernel.org/doc/htmldocs/filesystems/API-vfs-unlink.html" target="_blank">vfs_unlink</a>. '
- },
- 'ebpf.io': {
- title : 'VFS IO',
- info: 'Successful or failed calls to functions <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a> and <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>. This chart may not show all file system events if it uses other functions to store data on disk.'
- },
+ // ------------------------------------------------------------------------
+ // eBPF
- 'ebpf.io_bytes': {
- title : 'VFS bytes written',
- info: 'Total of bytes read or written with success using the functions <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a> and <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>.'
+ 'apps.swap_read_call': {
+ info: 'The function <code>swap_readpage</code> is called when the kernel reads a page from swap memory. Netdata also gives a summary for these charts in <a href="#menu_system_submenu_swap">System overview</a>.'
},
- 'ebpf.io_error': {
- title : 'VFS IO error',
- info: 'Failed calls to functions <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_read</a> and <a href="https://topic.alibabacloud.com/a/kernel-state-file-operation-__-work-information-kernel_8_8_20287135.html" target="_blank">vfs_write</a>.'
+ 'apps.swap_write_call': {
+ info: 'The function <code>swap_writepage</code> is called when the kernel writes a page to swap memory.'
},
- 'ebpf.process_thread': {
- title : 'Task creation',
- info: 'Number of times that either <a href="https://www.ece.uic.edu/~yshi1/linux/lkse/node4.html#SECTION00421000000000000000" target="_blank">do_fork</a>, or <code>kernel_clone</code> if you are running kernel newer than 5.9.16, is called to create a new task, which is the common name used to define process and tasks inside the kernel. Netdata identifies the threads by counting the number of calls for <a href="https://linux.die.net/man/2/clone" target="_blank">sys_clone</a> that has the flag <code>CLONE_THREAD</code> set.'
+ 'apps.shmget_call': {
+ info: 'Number of times the syscall <code>shmget</code> is called. Netdata also gives a summary for these charts in <a href="#menu_system_submenu_ipc_shared_memory">System overview</a>.'
},
- 'ebpf.exit': {
- title : 'Exit monitoring',
- info: 'Calls for the functions responsible for closing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">do_exit</a>) and releasing (<a href="https://www.informit.com/articles/article.aspx?p=370047&seqNum=4" target="_blank">release_task</a>) tasks.'
+ 'apps.shmat_call': {
+ info: 'Number of times the syscall <code>shmat</code> is called.'
},
- 'ebpf.task_error': {
- title : 'Task error',
- info: 'Number of errors to create a new process or thread.'
+ 'apps.shmdt_call': {
+ info: 'Number of times the syscall <code>shmdt</code> is called.'
},
- 'ebpf.process_status': {
- title : 'Task status',
- info: 'Difference between the number of process created and the number of threads created per period(<code>process</code> dimension), it also shows the number of possible zombie process running on system.'
+ 'apps.shmctl_call': {
+ info: 'Number of times the syscall <code>shmctl</code> is called.'
},
// ------------------------------------------------------------------------
@@ -3557,26 +5935,6 @@ netdataDashboard.context = {
}
]
},
- 'vernemq.queue_messages_in_queues': {
- mainheads: [
- function (os, id) {
- void (os);
- return '<div data-netdata="' + id + '"'
- + ' data-dimensions="queue_messages_current"'
- + ' data-chart-library="gauge"'
- + ' data-title="Messages in the Queues"'
- + ' data-units="messages"'
- + ' data-gauge-adjust="width"'
- + ' data-width="16%"'
- + ' data-before="0"'
- + ' data-after="-CHART_DURATION"'
- + ' data-points="CHART_DURATION"'
- + ' data-colors="' + NETDATA.colors[2] + '"'
- + ' data-decimal-digits="2"'
- + ' role="application"></div>';
- }
- ]
- },
'vernemq.queue_messages': {
mainheads: [
function (os, id) {
@@ -3991,4 +6349,28 @@ netdataDashboard.context = {
'See <a href="https://www.freedesktop.org/software/systemd/man/systemd.slice.html#" target="_blank"> systemd.slice(5)</a>.'
},
+ 'anomaly_detection.dimensions': {
+ info: 'Total count of dimensions considered anomalous or normal. '
+ },
+
+ 'anomaly_detection.anomaly_rate': {
+ info: 'Percentage of anomalous dimensions. '
+ },
+
+ 'anomaly_detection.detector_window': {
+ info: 'The length of the active window used by the detector. '
+ },
+
+ 'anomaly_detection.detector_events': {
+ info: 'Flags (0 or 1) to show when an anomaly event has been triggered by the detector. '
+ },
+
+ 'anomaly_detection.prediction_stats': {
+ info: 'Diagnostic metrics relating to prediction time of anomaly detection. '
+ },
+
+ 'anomaly_detection.training_stats': {
+ info: 'Diagnostic metrics relating to training time of anomaly detection. '
+ },
+
};