summaryrefslogtreecommitdiffstats
path: root/collectors/proc.plugin
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-08-10 09:18:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-08-10 09:18:49 +0000
commitdd814a7c1a8de056a79f7238578b09236edd5506 (patch)
tree429e7eed5a634a4efe9a6877ce66da8e64aa1782 /collectors/proc.plugin
parentAdding upstream version 1.41.0. (diff)
downloadnetdata-dd814a7c1a8de056a79f7238578b09236edd5506.tar.xz
netdata-dd814a7c1a8de056a79f7238578b09236edd5506.zip
Adding upstream version 1.42.0.upstream/1.42.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/proc.plugin')
-rw-r--r--collectors/proc.plugin/README.md31
-rw-r--r--collectors/proc.plugin/metadata.yaml (renamed from collectors/proc.plugin/multi_metadata.yaml)1965
-rw-r--r--collectors/proc.plugin/metrics.csv271
-rw-r--r--collectors/proc.plugin/plugin_proc.c8
-rw-r--r--collectors/proc.plugin/plugin_proc.h2
-rw-r--r--collectors/proc.plugin/proc_meminfo.c520
-rw-r--r--collectors/proc.plugin/proc_pressure.c149
-rw-r--r--collectors/proc.plugin/proc_pressure.h3
-rw-r--r--collectors/proc.plugin/proc_stat.c2
-rw-r--r--collectors/proc.plugin/proc_vmstat.c12
-rw-r--r--collectors/proc.plugin/sys_class_drm.c1179
-rw-r--r--collectors/proc.plugin/sys_devices_pci_aer.c335
-rw-r--r--collectors/proc.plugin/sys_devices_system_edac_mc.c334
13 files changed, 3580 insertions, 1231 deletions
diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md
index 6c1335a7..16ae6f41 100644
--- a/collectors/proc.plugin/README.md
+++ b/collectors/proc.plugin/README.md
@@ -31,6 +31,7 @@ In detail, it collects metrics from:
- `/proc/spl/kstat/zfs/pool/state` (state of ZFS pools)
- `/sys/class/power_supply` (power supply properties)
- `/sys/class/infiniband` (infiniband interconnect)
+- `/sys/class/drm` (AMD GPUs)
- `ipc` (IPC semaphores and message queues)
- `ksm` Kernel Same-Page Merging performance (several files under `/sys/kernel/mm/ksm`).
- `netdata` (internal Netdata resources utilization)
@@ -579,6 +580,36 @@ Default configuration will monitor only enabled infiniband ports, and refresh ne
# refresh ports state every seconds = 30
```
+## AMD GPUs
+
+This module monitors every AMD GPU card discovered at agent startup.
+
+### Monitored GPU metrics
+
+The following charts will be provided:
+
+- **GPU utilization**
+- **GPU memory utilization**
+- **GPU clock frequency**
+- **GPU memory clock frequency**
+- **VRAM memory usage percentage**
+- **VRAM memory usage**
+- **visible VRAM memory usage percentage**
+- **visible VRAM memory usage**
+- **GTT memory usage percentage**
+- **GTT memory usage**
+
+### configuration
+
+The `drm` path can be configured if it differs from the default:
+
+```
+[plugin:proc:/sys/class/drm]
+ # directory to monitor = /sys/class/drm
+```
+
+> [!NOTE]
+> Temperature, fan speed, voltage and power metrics for AMD GPUs can be monitored using the [Sensors](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/sensors/README.md) plugin.
## IPC
diff --git a/collectors/proc.plugin/multi_metadata.yaml b/collectors/proc.plugin/metadata.yaml
index e78ec795..81d83f50 100644
--- a/collectors/proc.plugin/multi_metadata.yaml
+++ b/collectors/proc.plugin/metadata.yaml
@@ -1,54 +1,80 @@
-name: proc.plugin
+plugin_name: proc.plugin
modules:
- meta:
plugin_name: proc.plugin
module_name: /proc/stat
monitored_instance:
- name: proc /proc/stat
- link: ''
- categories: []
- icon_filename: ''
+ name: System statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.system-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - cpu utilization
+ - process counts
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ CPU utilization, states and frequencies and key Linux system performance metrics.
+
+ The `/proc/stat` file provides various types of system statistics:
+
+ - The overall system CPU usage statistics
+ - Per CPU core statistics
+ - The total context switching of the system
+ - The total number of processes running
+ - The total CPU interrupts
+ - The total CPU softirqs
+
+ The collector also reads:
+
+ - `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
+ - `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
+ - `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
+ - `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
+ - `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
+ - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
+ - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
+ method_description: ""
supported_platforms:
- include: []
+ include: ["linux"]
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: |
+ The collector auto-detects all metrics. No configuration is needed.
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: |
+ The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ section_name: "plugin:proc:/proc/stat"
+ name: "netdata.conf"
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -69,11 +95,6 @@ modules:
metric: system.cpu
info: average CPU steal time over the last 20 minutes
os: "linux"
- - name: 10min_cpu_usage
- link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
- metric: system.cpu
- info: average CPU utilization over the last 10 minutes (excluding nice)
- os: "freebsd"
metrics:
folding:
title: Metrics
@@ -174,51 +195,84 @@ modules:
plugin_name: proc.plugin
module_name: /proc/sys/kernel/random/entropy_avail
monitored_instance:
- name: proc /proc/sys/kernel/random/entropy_avail
- link: ''
- categories: []
- icon_filename: ''
+ name: Entropy
+ link: ""
+ categories:
+ - data-collection.linux-systems.system-metrics
+ icon_filename: "syslog.png"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - entropy
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ Entropy, a measure of the randomness or unpredictability of data.
+
+ In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
+ secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
+ vulnerable to attacks that exploit the predictability of the generated keys.
+
+ In most operating systems, entropy is generated by collecting random events from various sources, such as
+ hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
+ of entropy, which is then used to generate random numbers when needed.
+
+ The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
+ to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
+ which blocks until enough entropy is available to generate the requested numbers. This ensures that the
+ generated numbers are truly random and not predictable.
+
+ However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
+ programs that rely on random numbers to slow down or even freeze. This is especially problematic for
+ cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
+
+ To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
+ entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
+ radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
+ software-based sources.
+
+ One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
+ for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
+ high-quality entropy, which can be used to seed the pool of entropy in the operating system.
+
+ Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
+ exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
+ can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
+ method_description: ""
supported_platforms:
- include: []
+ include: ["linux"]
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -227,8 +281,7 @@ modules:
- name: lowest_entropy
link: https://github.com/netdata/netdata/blob/master/health/health.d/entropy.conf
metric: system.entropy
- info: minimum number of entries in the random numbers pool in the last 5 minutes
- os: "linux"
+ info: minimum number of bits of entropy available for the kernel’s random number generator
metrics:
folding:
title: Metrics
@@ -250,51 +303,64 @@ modules:
plugin_name: proc.plugin
module_name: /proc/uptime
monitored_instance:
- name: proc /proc/uptime
- link: ''
- categories: []
- icon_filename: ''
+ name: System Uptime
+ link: ""
+ categories:
+ - data-collection.linux-systems.system-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - uptime
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ The amount of time the system has been up (running).
+
+ Uptime is a critical aspect of overall system performance:
+
+ - **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
+ - **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
+ - **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
+ - **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
+ - **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
+ - **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
+ - **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
+ method_description: ""
supported_platforms:
- include: []
+ include: ["linux"]
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -321,51 +387,77 @@ modules:
plugin_name: proc.plugin
module_name: /proc/vmstat
monitored_instance:
- name: proc /proc/vmstat
- link: ''
- categories: []
- icon_filename: ''
+ name: Memory Statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.memory-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - swap
+ - page faults
+ - oom
+ - numa
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ Linux Virtual memory subsystem.
+
+ Information about memory management, indicating how effectively the kernel allocates and frees
+ memory resources in response to system demands.
+
+ Monitors page faults, which occur when a process requests a portion of its memory that isn't
+ immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
+ provide insights into application behavior.
+
+ Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
+ swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
+ a compressed cache for swap pages, and provides insights into its usage and performance implications.
+
+ In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
+ memory resources between host and guest systems.
+
+ For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
+ can impact the performance based on the memory access times.
+
+ The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
+ of memory resources.
+ method_description: ""
supported_platforms:
- include: []
+ include: ["linux"]
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -373,7 +465,7 @@ modules:
alerts:
- name: 30min_ram_swapped_out
link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
- metric: system.swapio
+ metric: mem.swapio
info: percentage of the system RAM swapped in the last 30 minutes
os: "linux freebsd"
- name: oom_kill
@@ -392,7 +484,7 @@ modules:
description: ""
labels: []
metrics:
- - name: system.swapio
+ - name: mem.swapio
description: Swap I/O
unit: "KiB/s"
chart_type: area
@@ -514,51 +606,83 @@ modules:
plugin_name: proc.plugin
module_name: /proc/interrupts
monitored_instance:
- name: proc /proc/interrupts
- link: ''
- categories: []
- icon_filename: ''
+ name: Interrupts
+ link: ""
+ categories:
+ - data-collection.linux-systems.cpu-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - interrupts
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
+ The numbers reported are the counts of the interrupts that have occurred of each type.
+
+ An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
+ immediate attention. The processor then interrupts its current activities and executes the interrupt handler
+ to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
+
+ The types of interrupts include:
+
+ - **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
+ you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
+
+ - **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
+ used to switch the CPU among different tasks.
+
+ - **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
+
+ - **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
+
+ Monitoring `/proc/interrupts` can be used for:
+
+ - **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
+ configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
+ performance degradation.
+
+ - **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
+
+ - **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
+ understand what your system is doing. It can provide insights into the system's interaction with hardware,
+ drivers, and other parts of the kernel.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -597,51 +721,77 @@ modules:
plugin_name: proc.plugin
module_name: /proc/loadavg
monitored_instance:
- name: proc /proc/loadavg
- link: ''
- categories: []
- icon_filename: ''
+ name: System Load Average
+ link: ""
+ categories:
+ - data-collection.linux-systems.system-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - load
+ - load average
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ The `/proc/loadavg` file provides information about the system load average.
+
+ The load average is a measure of the amount of computational work that a system performs. It is a
+ representation of the average system load over a period of time.
+
+ This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes,
+ respectively. It also includes the currently running processes and the total number of processes.
+
+ Monitoring the load average can be used for:
+
+ - **System performance**: If the load average is too high, it may indicate that your system is overloaded.
+ On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the
+ load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is
+ overloaded and tasks are waiting for CPU time.
+
+ - **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be
+ due to a runaway process, a software bug, or a hardware issue.
+
+ - **Capacity planning**: By monitoring the load average over time, you can understand the trends in your
+ system's workload. This can help with capacity planning and scaling decisions.
+
+ Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O.
+ Therefore, high load averages could be due to I/O contention as well as CPU contention.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -700,51 +850,76 @@ modules:
plugin_name: proc.plugin
module_name: /proc/pressure
monitored_instance:
- name: proc /proc/pressure
- link: ''
- categories: []
- icon_filename: ''
+ name: Pressure Stall Information
+ link: ""
+ categories:
+ - data-collection.linux-systems.pressure-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - pressure
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information
+ (PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to
+ resource contention, such as CPU, memory, or I/O.
+
+ The collectors monitored 3 separate files for CPU, memory, and I/O:
+
+ - **cpu**: Tracks the amount of time tasks are stalled due to CPU contention.
+ - **memory**: Tracks the amount of time tasks are stalled due to memory contention.
+ - **io**: Tracks the amount of time tasks are stalled due to I/O contention.
+ - **irq**: Tracks the amount of time tasks are stalled due to IRQ contention.
+
+ Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes.
+
+ Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning:
+
+ - **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are
+ frequently being stalled due to lack of resources, which can significantly degrade system performance.
+
+ - **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can
+ help identify whether resource contention is the cause.
+
+ - **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource
+ utilization and make informed decisions about when to add more resources to your system.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -849,51 +1024,76 @@ modules:
plugin_name: proc.plugin
module_name: /proc/softirqs
monitored_instance:
- name: proc /proc/softirqs
- link: ''
- categories: []
- icon_filename: ''
+ name: SoftIRQ statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.cpu-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - softirqs
+ - interrupts
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half.
+ The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later.
+
+ Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be
+ deferred and processed later in a context where it's safe to enable interrupts.
+
+ The actual work of handling the interrupt is offloaded to a softirq and executed later when the system
+ decides it's a good time to process them. This helps to keep the system responsive by not blocking the top
+ half for too long, which could lead to missed interrupts.
+
+ Monitoring `/proc/softirqs` is useful for:
+
+ - **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high
+ rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue.
+
+ - **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about
+ what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem
+ with a disk.
+
+ - **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what
+ your system is doing, particularly in terms of how it's interacting with hardware and how it's handling
+ interrupts.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -932,51 +1132,75 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/softnet_stat
monitored_instance:
- name: proc /proc/net/softnet_stat
- link: ''
- categories: []
- icon_filename: ''
+ name: Softnet Statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - softnet
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ `/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq.
+
+ It provides information about:
+
+ - Total number of processed packets (`processed`).
+ - Times ksoftirq ran out of quota (`dropped`).
+ - Times net_rx_action was rescheduled.
+ - Number of times processed all lists before quota.
+ - Number of times did not process all lists due to quota.
+ - Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells.
+ - Number of times GRO cells were processed.
+
+ Monitoring the /proc/net/softnet_stat file can be useful for:
+
+ - **Network performance monitoring**: By tracking the total number of processed packets and how many packets
+ were dropped, you can gain insights into your system's network performance.
+
+ - **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues.
+ For instance, a high number of dropped packets may indicate a network problem.
+
+ - **Capacity planning**: If your system is consistently processing near its maximum capacity of network
+ packets, it might be time to consider upgrading your network infrastructure.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -990,13 +1214,10 @@ modules:
- name: 1min_netdev_budget_ran_outs
link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
metric: system.softnet_stat
- info: average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last minute (this can be a cause for dropped packets)
+ info:
+ average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last
+ minute (this can be a cause for dropped packets)
os: "linux"
- - name: 10min_netisr_backlog_exceeded
- link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
- metric: system.softnet_stat
- info: average number of drops in the last minute due to exceeded sysctl net.route.netisr_maxqlen (this can be a cause for dropped packets)
- os: "freebsd"
metrics:
folding:
title: Metrics
@@ -1036,51 +1257,72 @@ modules:
plugin_name: proc.plugin
module_name: /proc/meminfo
monitored_instance:
- name: proc /proc/meminfo
- link: ''
- categories: []
- icon_filename: ''
+ name: Memory Usage
+ link: ""
+ categories:
+ - data-collection.linux-systems.memory-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - memory
+ - ram
+ - available
+ - committed
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ `/proc/meminfo` provides detailed information about the system's current memory usage. It includes information
+ about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory,
+ SLAB memory, memory mappings, and more.
+
+ Monitoring /proc/meminfo can be useful for:
+
+ - **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system
+ tuning and optimization. For example, if your system is frequently low on free memory, it might benefit
+ from more RAM.
+
+ - **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about
+ whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could
+ mean that your system is swapping out a lot of memory to disk, which can degrade performance.
+
+ - **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed
+ decisions about future capacity needs.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -1091,24 +1333,14 @@ modules:
metric: system.ram
info: system memory utilization
os: "linux"
- - name: ram_in_use
- link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
- metric: system.ram
- info: system memory utilization
- os: "freebsd"
- name: ram_available
link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
metric: mem.available
info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
os: "linux"
- - name: ram_available
- link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
- metric: mem.available
- info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
- os: "freebsd"
- name: used_swap
link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
- metric: system.swap
+ metric: mem.swap
info: swap memory utilization
os: "linux freebsd"
- name: 1hour_memory_hw_corrupted
@@ -1142,13 +1374,26 @@ modules:
chart_type: area
dimensions:
- name: avail
- - name: system.swap
+ - name: mem.swap
description: System Swap
unit: "MiB"
chart_type: stacked
dimensions:
- name: free
- name: used
+ - name: mem.swap_cached
+ description: Swap Memory Cached in RAM
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: cached
+ - name: mem.zswap
+ description: Zswap Usage
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: in-ram
+ - name: on-disk
- name: mem.hwcorrupt
description: Corrupted Memory detected by ECC
unit: "MiB"
@@ -1188,7 +1433,7 @@ modules:
dimensions:
- name: reclaimable
- name: unreclaimable
- - name: mem.hugepage
+ - name: mem.hugepages
description: Dedicated HugePages Memory
unit: "MiB"
chart_type: stacked
@@ -1197,62 +1442,110 @@ modules:
- name: used
- name: surplus
- name: reserved
- - name: mem.transparent_hugepages
+ - name: mem.thp
description: Transparent HugePages Memory
unit: "MiB"
chart_type: stacked
dimensions:
- name: anonymous
- name: shmem
+ - name: mem.thp_details
+ description: Details of Transparent HugePages Usage
+ unit: "MiB"
+ chart_type: line
+ dimensions:
+ - name: ShmemPmdMapped
+ - name: FileHugePages
+ - name: FilePmdMapped
+ - name: mem.reclaiming
+ description: Memory Reclaiming
+ unit: "MiB"
+ chart_type: line
+ dimensions:
+ - name: Active
+ - name: Inactive
+ - name: Active(anon)
+ - name: Inactive(anon)
+ - name: Active(file)
+ - name: Inactive(file)
+ - name: Unevictable
+ - name: Mlocked
+ - name: mem.high_low
+ description: High and Low Used and Free Memory Areas
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: high_used
+ - name: low_used
+ - name: high_free
+ - name: low_free
+ - name: mem.cma
+ description: Contiguous Memory Allocator (CMA) Memory
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: used
+ - name: free
+ - name: mem.directmaps
+ description: Direct Memory Mappings
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: 4k
+ - name: 2m
+ - name: 4m
+ - name: 1g
- meta:
plugin_name: proc.plugin
module_name: /proc/pagetypeinfo
monitored_instance:
- name: proc /proc/pagetypeinfo
- link: ''
- categories: []
- icon_filename: ''
+ name: Page types
+ link: ""
+ categories:
+ - data-collection.linux-systems.memory-metrics
+ icon_filename: "microchip.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - memory page types
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides metrics about the system's memory page types"
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -1295,66 +1588,91 @@ modules:
plugin_name: proc.plugin
module_name: /sys/devices/system/edac/mc
monitored_instance:
- name: proc /sys/devices/system/edac/mc
- link: ''
- categories: []
- icon_filename: ''
+ name: Memory modules (DIMMs)
+ link: ""
+ categories:
+ - data-collection.linux-systems.memory-metrics
+ icon_filename: "microchip.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - edac
+ - ecc
+ - dimm
+ - ram
+ - hardware
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory,
+ primarily ECC (Error-Correcting Code) memory errors.
+
+ The collector provides data for:
+
+ - Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds:
+ - errors related to a DIMM
+ - errors that cannot be associated with a DIMM
+
+ - Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds:
+ - memory controllers that can identify the physical DIMMS and report errors directly for them,
+ - memory controllers that report errors for memory address ranges that can be linked to dimms.
+ In this case the DIMMS reported may be more than the physical DIMMS installed.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
list: []
alerts:
- - name: 1hour_ecc_memory_correctable
+ - name: ecc_memory_mc_noinfo_correctable
+ metric: mem.edac_mc
+ info: memory controller ${label:controller} ECC correctable errors (unknown DIMM slot) in the last 10 minutes
link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
- metric: mem.ecc_ce
- info: number of ECC correctable errors in the last 10 minutes
- os: "linux"
- - name: 1hour_ecc_memory_uncorrectable
+ - name: ecc_memory_mc_noinfo_uncorrectable
+ metric: mem.edac_mc
+ info: memory controller ${label:controller} ECC uncorrectable errors (unknown DIMM slot) in the last 10 minutes
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
+ - name: ecc_memory_dimm_correctable
+ metric: mem.edac_mc_dimm
+ info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
+ - name: ecc_memory_dimm_uncorrectable
+ metric: mem.edac_mc_dimm
+ info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes
link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
- metric: mem.ecc_ue
- info: number of ECC uncorrectable errors in the last 10 minutes
- os: "linux"
metrics:
folding:
title: Metrics
@@ -1362,71 +1680,117 @@ modules:
description: ""
availability: []
scopes:
- - name: global
- description: ""
- labels: []
+ - name: memory controller
+ description: These metrics refer to the memory controller.
+ labels:
+ - name: controller
+ description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller."
+ - name: mc_name
+ description: Memory controller type.
+ - name: size_mb
+ description: The amount of memory in megabytes that this memory controller manages.
+ - name: max_location
+ description: Last available memory slot in this memory controller.
metrics:
- - name: mem.ecc_ce
- description: ECC Memory Correctable Errors
- unit: "errors"
+ - name: mem.edac_mc
+ description: Memory Controller (MC) Error Detection And Correction (EDAC) Errors
+ unit: errors/s
chart_type: line
dimensions:
- - name: a dimension per mem controller
- - name: mem.ecc_ue
- description: ECC Memory Uncorrectable Errors
- unit: "errors"
+ - name: correctable
+ - name: uncorrectable
+ - name: correctable_noinfo
+ - name: uncorrectable_noinfo
+ - name: memory module
+ description: These metrics refer to the memory module (or rank, [depends on the memory controller](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#f5)).
+ labels:
+ - name: controller
+ description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller."
+ - name: dimm
+ description: "[dimmX or rankX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#dimmx-or-rankx-directories) directory name of this memory module."
+ - name: dimm_dev_type
+ description: Type of DRAM device used in this memory module. For example, x1, x2, x4, x8.
+ - name: dimm_edac_mode
+ description: Used type of error detection and correction. For example, S4ECD4ED would mean a Chipkill with x4 DRAM.
+ - name: dimm_label
+ description: Label assigned to this memory module.
+ - name: dimm_location
+ description: Location of the memory module.
+ - name: dimm_mem_type
+ description: Type of the memory module.
+ - name: size
+ description: The amount of memory in megabytes that this memory module manages.
+ metrics:
+ - name: mem.edac_mc
+ description: DIMM Error Detection And Correction (EDAC) Errors
+ unit: errors/s
chart_type: line
dimensions:
- - name: a dimension per mem controller
+ - name: correctable
+ - name: uncorrectable
- meta:
plugin_name: proc.plugin
module_name: /sys/devices/system/node
monitored_instance:
- name: proc /sys/devices/system/node
- link: ''
- categories: []
- icon_filename: ''
+ name: Non-Uniform Memory Access
+ link: ""
+ categories:
+ - data-collection.linux-systems.memory-metrics
+ icon_filename: "linuxserver.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - numa
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ Information about NUMA (Non-Uniform Memory Access) nodes on the system.
+
+ NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can
+ share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a
+ symmetric multiprocessing (SMP) system.
+
+ In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes.
+ Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access
+ memory in any of the nodes, it does so faster when accessing memory within its own node.
+
+ The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the
+ efficiency of memory allocations in multi-node systems.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -1460,51 +1824,61 @@ modules:
plugin_name: proc.plugin
module_name: /sys/kernel/mm/ksm
monitored_instance:
- name: proc /sys/kernel/mm/ksm
- link: ''
- categories: []
- icon_filename: ''
+ name: Kernel Same-Page Merging
+ link: ""
+ categories:
+ - data-collection.linux-systems.memory-metrics
+ icon_filename: "microchip.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - ksm
+ - samepage
+ - merging
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the
+ memory of different processes and identify identical pages. It then merges these identical pages into a
+ single page that the processes share. This is particularly useful for virtualization, where multiple virtual
+ machines might be running the same operating system or applications and have many identical pages.
+
+ The collector provides information about the operation and effectiveness of KSM on your system.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -1547,51 +1921,57 @@ modules:
plugin_name: proc.plugin
module_name: /sys/block/zram
monitored_instance:
- name: proc /sys/block/zram
- link: ''
- categories: []
- icon_filename: ''
+ name: ZRAM
+ link: ""
+ categories:
+ - data-collection.linux-systems.memory-metrics
+ icon_filename: "microchip.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - zram
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device.
+ The data written to this block device is compressed and stored in memory.
+
+ The collectors provides information about the operation and the effectiveness of zRAM on your system.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -1640,51 +2020,69 @@ modules:
plugin_name: proc.plugin
module_name: ipc
monitored_instance:
- name: proc ipc
- link: ''
- categories: []
- icon_filename: ''
+ name: Inter Process Communication
+ link: ""
+ categories:
+ - data-collection.linux-systems.ipc-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - ipc
+ - semaphores
+ - shared memory
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each
+ other and synchronize their actions.
+
+ This collector exposes information about:
+
+ - Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that
+ allows messages to be placed onto a queue and read at a later time.
+
+ - Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by
+ reading/writing into shared memory segments.
+
+ - Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple
+ processes are trying to access a single shared resource, semaphores can ensure that only one process
+ accesses the resource at a given time.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: false
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -1751,51 +2149,61 @@ modules:
plugin_name: proc.plugin
module_name: /proc/diskstats
monitored_instance:
- name: proc /proc/diskstats
- link: ''
- categories: []
- icon_filename: ''
+ name: Disk Statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.disk-metrics
+ icon_filename: "hard-drive.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - disk
+ - disks
+ - io
+ - bcache
+ - block devices
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ Detailed statistics for each of your system's disk devices and partitions.
+ The data is reported by the kernel and can be used to monitor disk activity on a Linux system.
+
+ Get valuable insight into how your disks are performing and where potential bottlenecks might be.
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -1818,7 +2226,9 @@ modules:
- name: bcache_cache_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf
metric: disk.bcache_cache_read_races
- info: number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is reread from the backing device)
+ info:
+ number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is
+ reread from the backing device)
metrics:
folding:
title: Metrics
@@ -2023,51 +2433,56 @@ modules:
plugin_name: proc.plugin
module_name: /proc/mdstat
monitored_instance:
- name: proc /proc/mdstat
- link: ''
- categories: []
- icon_filename: ''
+ name: MD RAID
+ link: ""
+ categories:
+ - data-collection.linux-systems.disk-metrics
+ icon_filename: "hard-drive.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - raid
+ - mdadm
+ - mdstat
+ - raid
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration monitors the status of MD RAID devices."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -2080,7 +2495,8 @@ modules:
- name: mdstat_disks
link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
metric: md.disks
- info: number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded.
+ info:
+ number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded.
- name: mdstat_mismatch_cnt
link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
metric: md.mismatch_cnt
@@ -2158,51 +2574,53 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/dev
monitored_instance:
- name: proc /proc/net/dev
- link: ''
- categories: []
- icon_filename: ''
+ name: Network interfaces
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - network interfaces
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "Monitor network interface metrics about bandwidth, state, errors and more."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -2251,18 +2669,10 @@ modules:
- name: 10s_received_packets_storm
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.packets
- info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute
+ info:
+ ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over
+ the last minute
os: "linux freebsd"
- - name: interface_inbound_errors
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.errors
- info: number of inbound errors for the network interface ${label:device} in the last 10 minutes
- os: "freebsd"
- - name: interface_outbound_errors
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.errors
- info: number of outbound errors for the network interface ${label:device} in the last 10 minutes
- os: "freebsd"
- name: inbound_packets_dropped
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.drops
@@ -2398,51 +2808,53 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/wireless
monitored_instance:
- name: proc /proc/net/wireless
- link: ''
- categories: []
- icon_filename: ''
+ name: Wireless network interfaces
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - wireless devices
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "Monitor wireless devices with metrics about status, link quality, signal level, noise level and more."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -2472,13 +2884,16 @@ modules:
dimensions:
- name: link_quality
- name: wireless.signal_level
- description: The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal.
+ description:
+ The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the
+ signal.
unit: "dBm"
chart_type: line
dimensions:
- name: signal_level
- name: wireless.noise_level
- description: The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.
+ description:
+ The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.
unit: "dBm"
chart_type: line
dimensions:
@@ -2503,51 +2918,54 @@ modules:
plugin_name: proc.plugin
module_name: /sys/class/infiniband
monitored_instance:
- name: proc /sys/class/infiniband
- link: ''
- categories: []
- icon_filename: ''
+ name: InfiniBand
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - infiniband
+ - rdma
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration monitors InfiniBand network inteface statistics."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -2643,51 +3061,58 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/netstat
monitored_instance:
- name: proc /proc/net/netstat
- link: ''
- categories: []
- icon_filename: ''
+ name: Network statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - ip
+ - udp
+ - udplite
+ - icmp
+ - netstat
+ - snmp
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides metrics from the `netstat`, `snmp` and `snmp6` modules."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -2726,7 +3151,9 @@ modules:
- name: 10s_ipv4_tcp_resets_sent
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
metric: ipv4.tcphandshake
- info: average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm.
+ info:
+ average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has
+ crashed. Netdata will not send a clear notification for this alarm.
os: "linux"
- name: 1m_ipv4_tcp_resets_received
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
@@ -2736,7 +3163,9 @@ modules:
- name: 10s_ipv4_tcp_resets_received
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
metric: ipv4.tcphandshake
- info: average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm.
+ info:
+ average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed.
+ Netdata will not send a clear notification for this alarm.
os: "linux freebsd"
- name: 1m_ipv4_udp_receive_buffer_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
@@ -3232,51 +3661,53 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/sockstat
monitored_instance:
- name: proc /proc/net/sockstat
- link: ''
- categories: []
- icon_filename: ''
+ name: Socket statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - sockets
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides socket statistics."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -3364,51 +3795,53 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/sockstat6
monitored_instance:
- name: proc /proc/net/sockstat6
- link: ''
- categories: []
- icon_filename: ''
+ name: IPv6 Socket Statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - ipv6 sockets
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides IPv6 socket statistics."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -3459,51 +3892,53 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/ip_vs_stats
monitored_instance:
- name: proc /proc/net/ip_vs_stats
- link: ''
- categories: []
- icon_filename: ''
+ name: IP Virtual Server
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - ip virtual server
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration monitors IP Virtual Server statistics"
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -3544,51 +3979,54 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/rpc/nfs
monitored_instance:
- name: proc /proc/net/rpc/nfs
- link: ''
- categories: []
- icon_filename: ''
+ name: NFS Client
+ link: ""
+ categories:
+ - data-collection.linux-systems.filesystem-metrics.nfs
+ icon_filename: "nfs.png"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - nfs client
+ - filesystem
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides statistics from the Linux kernel's NFS Client."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -3642,51 +4080,54 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/rpc/nfsd
monitored_instance:
- name: proc /proc/net/rpc/nfsd
- link: ''
- categories: []
- icon_filename: ''
+ name: NFS Server
+ link: ""
+ categories:
+ - data-collection.linux-systems.filesystem-metrics.nfs
+ icon_filename: "nfs.png"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - nfs server
+ - filesystem
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides statistics from the Linux kernel's NFS Server."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -3773,51 +4214,54 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/sctp/snmp
monitored_instance:
- name: proc /proc/net/sctp/snmp
- link: ''
- categories: []
- icon_filename: ''
+ name: SCTP Statistics
+ link: ""
+ categories:
+ - data-collection.linux-systems.network-metrics
+ icon_filename: "network-wired.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - sctp
+ - stream control transmission protocol
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides statistics about the Stream Control Transmission Protocol (SCTP)."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -3874,51 +4318,55 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/stat/nf_conntrack
monitored_instance:
- name: proc /proc/net/stat/nf_conntrack
- link: ''
- categories: []
- icon_filename: ''
+ name: Conntrack
+ link: ""
+ categories:
+ - data-collection.linux-systems.firewall-metrics
+ icon_filename: "firewall.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - connection tracking mechanism
+ - netfilter
+ - conntrack
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration monitors the connection tracking mechanism of Netfilter in the Linux Kernel."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -3991,51 +4439,53 @@ modules:
plugin_name: proc.plugin
module_name: /proc/net/stat/synproxy
monitored_instance:
- name: proc /proc/net/stat/synproxy
- link: ''
- categories: []
- icon_filename: ''
+ name: Synproxy
+ link: ""
+ categories:
+ - data-collection.linux-systems.firewall-metrics
+ icon_filename: "firewall.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - synproxy
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides statistics about the Synproxy netfilter module."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -4076,51 +4526,56 @@ modules:
plugin_name: proc.plugin
module_name: /proc/spl/kstat/zfs
monitored_instance:
- name: proc /proc/spl/kstat/zfs
- link: ''
- categories: []
- icon_filename: ''
+ name: ZFS Pools
+ link: ""
+ categories:
+ - data-collection.linux-systems.filesystem-metrics.zfs
+ icon_filename: "filesystem.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - zfs pools
+ - pools
+ - zfs
+ - filesystem
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides metrics about the state of ZFS pools."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -4163,51 +4618,56 @@ modules:
plugin_name: proc.plugin
module_name: /proc/spl/kstat/zfs/arcstats
monitored_instance:
- name: proc /proc/spl/kstat/zfs/arcstats
- link: ''
- categories: []
- icon_filename: ''
+ name: ZFS Adaptive Replacement Cache
+ link: ""
+ categories:
+ - data-collection.linux-systems.filesystem-metrics.zfs
+ icon_filename: "filesystem.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - zfs arc
+ - arc
+ - zfs
+ - filesystem
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration monitors ZFS Adadptive Replacement Cache (ARC) statistics."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -4424,51 +4884,54 @@ modules:
plugin_name: proc.plugin
module_name: /sys/fs/btrfs
monitored_instance:
- name: proc /sys/fs/btrfs
- link: ''
- categories: []
- icon_filename: ''
+ name: BTRFS
+ link: ""
+ categories:
+ - data-collection.linux-systems.filesystem-metrics.btrfs
+ icon_filename: "filesystem.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - btrfs
+ - filesystem
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration provides usage and error statistics from the BTRFS filesystem."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -4611,51 +5074,54 @@ modules:
plugin_name: proc.plugin
module_name: /sys/class/power_supply
monitored_instance:
- name: proc /sys/class/power_supply
- link: ''
- categories: []
- icon_filename: ''
+ name: Power Supply
+ link: ""
+ categories:
+ - data-collection.linux-systems.power-supply-metrics
+ icon_filename: "powersupply.svg"
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - psu
+ - power supply
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: "This integration monitors Power supply metrics, such as battery status, AC power status and more."
+ method_description: ""
supported_platforms:
include: []
exclude: []
- multi-instance: true
+ multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: ""
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
list: []
configuration:
file:
- name: ''
- description: ''
+ name: ""
+ description: ""
options:
- description: ''
+ description: ""
folding:
- title: ''
+ title: ""
enabled: true
list: []
examples:
folding:
enabled: true
- title: ''
+ title: ""
list: []
troubleshooting:
problems:
@@ -4714,3 +5180,138 @@ modules:
- name: now
- name: max
- name: max_design
+ - meta:
+ plugin_name: proc.plugin
+ module_name: /sys/class/drm
+ monitored_instance:
+ name: AMD GPU
+ link: "https://www.amd.com"
+ categories:
+ - data-collection.hardware-devices-and-sensors
+ icon_filename: amd.svg
+ related_resources:
+ integrations:
+ list: []
+ info_provided_to_referring_integrations:
+ description: ""
+ keywords:
+ - amd
+ - gpu
+ - hardware
+ most_popular: false
+ overview:
+ data_collection:
+ metrics_description: "This integration monitors AMD GPU metrics, such as utilization, clock frequency and memory usage."
+ method_description: "It reads `/sys/class/drm` to collect metrics for every AMD GPU card instance it encounters."
+ supported_platforms:
+ include:
+ - Linux
+ exclude: []
+ multi_instance: true
+ additional_permissions:
+ description: ""
+ default_behavior:
+ auto_detection:
+ description: ""
+ limits:
+ description: ""
+ performance_impact:
+ description: ""
+ setup:
+ prerequisites:
+ list: []
+ configuration:
+ file:
+ name: ""
+ description: ""
+ options:
+ description: ""
+ folding:
+ title: ""
+ enabled: true
+ list: []
+ examples:
+ folding:
+ enabled: true
+ title: ""
+ list: []
+ troubleshooting:
+ problems:
+ list: []
+ alerts: []
+ metrics:
+ folding:
+ title: Metrics
+ enabled: false
+ description: ""
+ availability: []
+ scopes:
+ - name: gpu
+ description: "These metrics refer to the GPU."
+ labels:
+ - name: product_name
+ description: GPU product name (e.g. AMD RX 6600)
+ metrics:
+ - name: amdgpu.gpu_utilization
+ description: GPU utilization
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: utilization
+ - name: amdgpu.gpu_mem_utilization
+ description: GPU memory utilization
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: utilization
+ - name: amdgpu.gpu_clk_frequency
+ description: GPU clock frequency
+ unit: "MHz"
+ chart_type: line
+ dimensions:
+ - name: frequency
+ - name: amdgpu.gpu_mem_clk_frequency
+ description: GPU memory clock frequency
+ unit: "MHz"
+ chart_type: line
+ dimensions:
+ - name: frequency
+ - name: amdgpu.gpu_mem_vram_usage_perc
+ description: VRAM memory usage percentage
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: usage
+ - name: amdgpu.gpu_mem_vram_usage
+ description: VRAM memory usage
+ unit: "bytes"
+ chart_type: area
+ dimensions:
+ - name: free
+ - name: used
+ - name: amdgpu.gpu_mem_vis_vram_usage_perc
+ description: visible VRAM memory usage percentage
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: usage
+ - name: amdgpu.gpu_mem_vis_vram_usage
+ description: visible VRAM memory usage
+ unit: "bytes"
+ chart_type: area
+ dimensions:
+ - name: free
+ - name: used
+ - name: amdgpu.gpu_mem_gtt_usage_perc
+ description: GTT memory usage percentage
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: usage
+ - name: amdgpu.gpu_mem_gtt_usage
+ description: GTT memory usage
+ unit: "bytes"
+ chart_type: area
+ dimensions:
+ - name: free
+ - name: used
diff --git a/collectors/proc.plugin/metrics.csv b/collectors/proc.plugin/metrics.csv
deleted file mode 100644
index ea0d1b36..00000000
--- a/collectors/proc.plugin/metrics.csv
+++ /dev/null
@@ -1,271 +0,0 @@
-metric,scope,dimensions,unit,description,chart_type,labels,plugin,module
-system.cpu,,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Total CPU utilization,stacked,,proc.plugin,/proc/stat
-cpu.cpu,cpu core,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Core utilization,stacked,cpu,proc.plugin,/proc/stat
-system.intr,,interrupts,interrupts/s,CPU Interrupts,line,,proc.plugin,/proc/stat
-system.ctxt,,switches,context switches/s,CPU Context Switches,line,,proc.plugin,/proc/stat
-system.forks,,started,processes/s,Started Processes,line,,proc.plugin,/proc/stat
-system.processes,,"running, blocked",processes,System Processes,line,,proc.plugin,/proc/stat
-cpu.core_throttling,,a dimension per cpu core,events/s,Core Thermal Throttling Events,line,,proc.plugin,/proc/stat
-cpu.package_throttling,,a dimension per package,events/s,Package Thermal Throttling Events,line,,proc.plugin,/proc/stat
-cpu.cpufreq,,a dimension per cpu core,MHz,Current CPU Frequency,line,,proc.plugin,/proc/stat
-cpuidle.cpu_cstate_residency_time,cpu core,a dimension per c-state,percentage,C-state residency time,stacked,cpu,proc.plugin,/proc/stat
-system.entropy,,entropy,entropy,Available Entropy,line,,proc.plugin,/proc/sys/kernel/random/entropy_avail
-system.uptime,,uptime,seconds,System Uptime,line,,proc.plugin,/proc/uptime
-system.swapio,,"in, out",KiB/s,Swap I/O,area,,proc.plugin,/proc/vmstat
-system.pgpgio,,"in, out",KiB/s,Memory Paged from/to disk,area,,proc.plugin,/proc/vmstat
-system.pgfaults,,"minor, major",faults/s,Memory Page Faults,line,,proc.plugin,/proc/vmstat
-system.interrupts,,a dimension per device,interrupts/s,System interrupts,stacked,,proc.plugin,/proc/interrupts
-cpu.interrupts,cpu core,a dimension per device,interrupts/s,CPU interrupts,stacked,cpu,proc.plugin,/proc/interrupts
-system.load,,"load1, load5, load15",load,System Load Average,line,,proc.plugin,/proc/loadavg
-system.active_processes,,active,processes,System Active Processes,line,,proc.plugin,/proc/loadavg
-system.cpu_some_pressure,,"some10, some60, some300",percentage,"CPU some pressure",line,,proc.plugin,/proc/pressure
-system.cpu_some_pressure_stall_time,,time,ms,"CPU some pressure stall time",line,,proc.plugin,/proc/pressure
-system.cpu_full_pressure,,"some10, some60, some300",percentage,"CPU full pressure",line,,proc.plugin,/proc/pressure
-system.cpu_full_pressure_stall_time,,time,ms,"CPU full pressure stall time",line,,proc.plugin,/proc/pressure
-system.memory_some_pressure,,"some10, some60, some300",percentage,"Memory some pressure",line,,proc.plugin,/proc/pressure
-system.memory_some_pressure_stall_time,,time,ms,"Memory some pressure stall time",line,,proc.plugin,/proc/pressure
-system.memory_full_pressure,,"some10, some60, some300",percentage,"Memory full pressure",line,,proc.plugin,/proc/pressure
-system.memory_full_pressure_stall_time,,time,ms,"Memory full pressure stall time",line,,proc.plugin,/proc/pressure
-system.io_some_pressure,,"some10, some60, some300",percentage,"I/O some pressure",line,,proc.plugin,/proc/pressure
-system.io_some_pressure_stall_time,,time,ms,"I/O some pressure stall time",line,,proc.plugin,/proc/pressure
-system.io_full_pressure,,"some10, some60, some300",percentage,"I/O some pressure",line,,proc.plugin,/proc/pressure
-system.io_full_pressure_stall_time,,time,ms,"I/O some pressure stall time",line,,proc.plugin,/proc/pressure
-system.softirqs,,a dimension per softirq,softirqs/s,System softirqs,stacked,,proc.plugin,/proc/softirqs
-cpu.softirqs,cpu core,a dimension per softirq,softirqs/s,CPU softirqs,stacked,cpu,proc.plugin,/proc/softirqs
-system.softnet_stat,,"processed, dropped, squeezed, received_rps, flow_limit_count",events/s,System softnet_stat,line,,proc.plugin,/proc/net/softnet_stat
-cpu.softnet_stat,cpu core,"processed, dropped, squeezed, received_rps, flow_limit_count",events/s,CPU softnet_stat,line,,proc.plugin,/proc/net/softnet_stat
-system.ram,,"free, used, cached, buffers",MiB,System RAM,stacked,,proc.plugin,/proc/meminfo
-mem.available,,avail,MiB,Available RAM for applications,area,,proc.plugin,/proc/meminfo
-system.swap,,"free, used",MiB,System Swap,stacked,,proc.plugin,/proc/meminfo
-mem.hwcorrupt,,HardwareCorrupted,MiB,Corrupted Memory detected by ECC,line,,proc.plugin,/proc/meminfo
-mem.commited,,Commited_AS,MiB,Committed (Allocated) Memory,area,,proc.plugin,/proc/meminfo
-mem.writeback,,"Dirty, Writeback, FuseWriteback, NfsWriteback, Bounce",MiB,Writeback Memory,line,,proc.plugin,/proc/meminfo
-mem.kernel,,"Slab, KernelStack, PageTables, VmallocUsed, Percpu",MiB,Memory Used by Kernel,stacked,,proc.plugin,/proc/meminfo
-mem.slab,,"reclaimable, unreclaimable",MiB,Reclaimable Kernel Memory,stacked,,proc.plugin,/proc/meminfo
-mem.hugepage,,"free, used, surplus, reserved",MiB,Dedicated HugePages Memory,stacked,,proc.plugin,/proc/meminfo
-mem.transparent_hugepages,,"anonymous, shmem",MiB,Transparent HugePages Memory,stacked,,proc.plugin,/proc/meminfo
-mem.balloon,,"inflate, deflate, migrate",KiB/s,Memory Ballooning Operations,line,,proc.plugin,/proc/vmstat
-mem.zswapio,,"in, out",KiB/s,ZSwap I/O,area,,proc.plugin,/proc/vmstat
-mem.ksm_cow,,"swapin, write",KiB/s,KSM Copy On Write Operations,line,,proc.plugin,/proc/vmstat
-mem.thp_faults,,"alloc, fallback, fallback_charge",events/s,Transparent Huge Page Fault Allocations,line,,proc.plugin,/proc/vmstat
-mem.thp_file,,"alloc, fallback, mapped, fallback_charge",events/s,Transparent Huge Page File Allocations,line,,proc.plugin,/proc/vmstat
-mem.thp_zero,,"alloc, failed",events/s,Transparent Huge Zero Page Allocations,line,,proc.plugin,/proc/vmstat
-mem.thp_collapse,,"alloc, failed",events/s,Transparent Huge Pages Collapsed by khugepaged,line,,proc.plugin,/proc/vmstat
-mem.thp_split,,"split, failed, split_pmd, split_deferred",events/s,Transparent Huge Page Splits,line,,proc.plugin,/proc/vmstat
-mem.thp_swapout,,"swapout, fallback",events/s,Transparent Huge Pages Swap Out,line,,proc.plugin,/proc/vmstat
-mem.thp_compact,,"success, fail, stall",events/s,Transparent Huge Pages Compaction,line,,proc.plugin,/proc/vmstat
-mem.pagetype_global,,a dimension per pagesize,B,System orders available,stacked,,proc.plugin,/proc/pagetypeinfo
-mem.pagetype,"node, zone, type",a dimension per pagesize,B,"pagetype_Node{node}_{zone}_{type}",stacked,"node_id, node_zone, node_type",proc.plugin,/proc/pagetypeinfo
-mem.oom_kill,,kills,kills/s,Out of Memory Kills,line,,proc.plugin,/proc/vmstat
-mem.numa,,"local, foreign, interleave, other, pte_updates, huge_pte_updates, hint_faults, hint_faults_local, pages_migrated",events/s,NUMA events,line,,proc.plugin,/proc/vmstat
-mem.ecc_ce,,a dimension per mem controller,errors,ECC Memory Correctable Errors,line,,proc.plugin,/sys/devices/system/edac/mc
-mem.ecc_ue,,a dimension per mem controller,errors,ECC Memory Uncorrectable Errors,line,,proc.plugin,/sys/devices/system/edac/mc
-mem.numa_nodes,numa node,"hit, miss, local, foreign, interleave, other",events/s,NUMA events,line,numa_node,proc.plugin,/sys/devices/system/node
-mem.ksm,,"shared, unshared, sharing, volatile",MiB,Kernel Same Page Merging,stacked,,proc.plugin,/sys/kernel/mm/ksm
-mem.ksm_savings,,"savings, offered",MiB,Kernel Same Page Merging Savings,area,,proc.plugin,/sys/kernel/mm/ksm
-mem.ksm_ratios,,savings,percentage,Kernel Same Page Merging Effectiveness,line,,proc.plugin,/sys/kernel/mm/ksm
-mem.zram_usage,zram device,"compressed, metadata",MiB,ZRAM Memory Usage,area,device,proc.plugin,/sys/block/zram
-mem.zram_savings,zram device,"savings, original",MiB,ZRAM Memory Savings,area,device,proc.plugin,/sys/block/zram
-mem.zram_ratio,zram device,ratio,ratio,ZRAM Compression Ratio (original to compressed),line,device,proc.plugin,/sys/block/zram
-mem.zram_efficiency,zram device,percent,percentage,ZRAM Efficiency,line,device,proc.plugin,/sys/block/zram
-system.ipc_semaphores,,semaphores,semaphores,IPC Semaphores,area,,proc.plugin,ipc
-system.ipc_semaphore_arrays,,arrays,arrays,IPC Semaphore Arrays,area,,proc.plugin,ipc
-system.message_queue_message,,a dimension per queue,messages,IPC Message Queue Number of Messages,stacked,,proc.plugin,ipc
-system.message_queue_bytes,,a dimension per queue,bytes,IPC Message Queue Used Bytes,stacked,,proc.plugin,ipc
-system.shared_memory_segments,,segments,segments,IPC Shared Memory Number of Segments,stacked,,proc.plugin,ipc
-system.shared_memory_bytes,,bytes,bytes,IPC Shared Memory Used Bytes,stacked,,proc.plugin,ipc
-system.io,,"in, out",KiB/s,Disk I/O,area,,proc.plugin,/proc/diskstats
-disk.io,disk,"reads, writes",KiB/s,Disk I/O Bandwidth,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk_ext.io,disk,discards,KiB/s,Amount of Discarded Data,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.ops,disk,"reads, writes",operations/s,Disk Completed I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk_ext.ops,disk,"discards, flushes",operations/s,Disk Completed Extended I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.qops,disk,operations,operations,Disk Current I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.backlog,disk,backlog,milliseconds,Disk Backlog,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.busy,disk,busy,milliseconds,Disk Busy Time,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.util,disk,utilization,% of time working,Disk Utilization Time,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.mops,disk,"reads, writes",merged operations/s,Disk Merged Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk_ext.mops,disk,discards,merged operations/s,Disk Merged Discard Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.iotime,disk,"reads, writes",milliseconds/s,Disk Total I/O Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk_ext.iotime,disk,"discards, flushes",milliseconds/s,Disk Total I/O Time for Extended Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.await,disk,"reads, writes",milliseconds/operation,Average Completed I/O Operation Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk_ext.await,disk,"discards, flushes",milliseconds/operation,Average Completed Extended I/O Operation Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.avgsz,disk,"reads, writes",KiB/operation,Average Completed I/O Operation Bandwidth,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk_ext.avgsz,disk,discards,KiB/operation,Average Amount of Discarded Data,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.svctm,disk,svctm,milliseconds/operation,Average Service Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.bcache_cache_alloc,disk,"ununsed, dirty, clean, metadata, undefined",percentage,BCache Cache Allocations,stacked,,proc.plugin,/proc/diskstats
-disk.bcache_hit_ratio,disk,"5min, 1hour, 1day, ever",percentage,BCache Cache Hit Ratio,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.bcache_rates,disk,"congested, writeback",KiB/s,BCache Rates,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.bcache_size,disk,dirty,MiB,BCache Cache Sizes,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.bcache_usage,disk,avail,percentage,BCache Cache Usage,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.bcache_cache_read_races,disk,"races, errors",operations/s,BCache Cache Read Races,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.bcache,disk,"hits, misses, collisions, readaheads",operations/s,BCache Cache I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-disk.bcache_bypass,disk,"hits, misses",operations/s,BCache Cache Bypass I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
-md.health,,a dimension per md array,failed disks,Faulty Devices In MD,line,,proc.plugin,/proc/mdstat
-md.disks,md array,"inuse, down",disks,Disks Stats,stacked,"device, raid_level",proc.plugin,/proc/mdstat
-md.mismatch_cnt,md array,count,unsynchronized blocks,Mismatch Count,line,"device, raid_level",proc.plugin,/proc/mdstat
-md.status,md array,"check, resync, recovery, reshape",percent,Current Status,line,"device, raid_level",proc.plugin,/proc/mdstat
-md.expected_time_until_operation_finish,md array,finish_in,seconds,Approximate Time Until Finish,line,"device, raid_level",proc.plugin,/proc/mdstat
-md.operation_speed,md array,speed,KiB/s,Operation Speed,line,"device, raid_level",proc.plugin,/proc/mdstat
-md.nonredundant,md array,available,boolean,Nonredundant Array Availability,line,"device, raid_level",proc.plugin,/proc/mdstat
-system.net,,"received, sent",kilobits/s,Physical Network Interfaces Aggregated Bandwidth,area,,proc.plugin,/proc/net/dev
-net.net,network device,"received, sent",kilobits/s,Bandwidth,area,"interface_type, device",proc.plugin,/proc/net/dev
-net.speed,network device,speed,kilobits/s,Interface Speed,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.duplex,network device,"full, half, unknown",state,Interface Duplex State,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.operstate,network device,"up, down, notpresent, lowerlayerdown, testing, dormant, unknown",state,Interface Operational State,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.carrier,network device,"up, down",state,Interface Physical Link State,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.mtu,network device,mtu,octets,Interface MTU,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.packets,network device,"received, sent, multicast",packets/s,Packets,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.errors,network device,"inbound, outbound",errors/s,Interface Errors,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.drops,network device,"inbound, outbound",drops/s,Interface Drops,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.fifo,network device,"receive, transmit",errors,Interface FIFO Buffer Errors,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.compressed,network device,"received, sent",packets/s,Compressed Packets,line,"interface_type, device",proc.plugin,/proc/net/dev
-net.events,network device,"frames, collisions, carrier",events/s,Network Interface Events,line,"interface_type, device",proc.plugin,/proc/net/dev
-wireless.status,wireless device,status,status,Internal status reported by interface.,line,,proc.plugin,/proc/net/wireless
-wireless.link_quality,wireless device,link_quality,value,"Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.",line,,proc.plugin,/proc/net/wireless
-wireless.signal_level,wireless device,signal_level,dBm,"The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal.",line,,proc.plugin,/proc/net/wireless
-wireless.noise_level,wireless device,noise_level,dBm,"The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.",line,,proc.plugin,/proc/net/wireless
-wireless.discarded_packets,wireless device,"nwid, crypt, frag, retry, misc",packets/s,"Packet discarded in the wireless adapter due to wireless specific problems.",line,,proc.plugin,/proc/net/wireless
-wireless.missed_beacons,wireless device,missed_beacons,frames/s,Number of missed beacons.,line,,proc.plugin,/proc/net/wireless
-ib.bytes,infiniband port,"Received, Sent",kilobits/s,Bandwidth usage,area,,proc.plugin,/sys/class/infiniband
-ib.packets,infiniband port,"Received, Sent, Mcast_rcvd, Mcast_sent, Ucast_rcvd, Ucast_sent",packets/s,Packets Statistics,area,,proc.plugin,/sys/class/infiniband
-ib.errors,infiniband port,"Pkts_malformated, Pkts_rcvd_discarded, Pkts_sent_discarded, Tick_Wait_to_send, Pkts_missed_resource, Buffer_overrun, Link_Downed, Link_recovered, Link_integrity_err, Link_minor_errors, Pkts_rcvd_with_EBP, Pkts_rcvd_discarded_by_switch, Pkts_sent_discarded_by_switch",errors/s,Error Counters,line,,proc.plugin,/sys/class/infiniband
-ib.hwerrors,infiniband port,"Duplicated_packets, Pkt_Seq_Num_gap, Ack_timer_expired, Drop_missing_buffer, Drop_out_of_sequence, NAK_sequence_rcvd, CQE_err_Req, CQE_err_Resp, CQE_Flushed_err_Req, CQE_Flushed_err_Resp, Remote_access_err_Req, Remote_access_err_Resp, Remote_invalid_req, Local_length_err_Resp, RNR_NAK_Packets, CNP_Pkts_ignored, RoCE_ICRC_Errors",errors/s,Hardware Errors,line,,proc.plugin,/sys/class/infiniband
-ib.hwpackets,infiniband port,"RoCEv2_Congestion_sent, RoCEv2_Congestion_rcvd, IB_Congestion_handled, ATOMIC_req_rcvd, Connection_req_rcvd, Read_req_rcvd, Write_req_rcvd, RoCE_retrans_adaptive, RoCE_retrans_timeout, RoCE_slow_restart, RoCE_slow_restart_congestion, RoCE_slow_restart_count",packets/s,Hardware Packets Statistics,line,,proc.plugin,/sys/class/infiniband
-system.ip,,"received, sent",kilobits/s,IP Bandwidth,area,,proc.plugin,/proc/net/netstat
-ip.inerrors,,"noroutes, truncated, checksum",packets/s,IP Input Errors,line,,proc.plugin,/proc/net/netstat
-ip.mcast,,"received, sent",kilobits/s,IP Multicast Bandwidth,area,,proc.plugin,/proc/net/netstat
-ip.bcast,,"received, sent",kilobits/s,IP Broadcast Bandwidth,area,,proc.plugin,/proc/net/netstat
-ip.mcastpkts,,"received, sent",packets/s,IP Multicast Packets,line,,proc.plugin,/proc/net/netstat
-ip.bcastpkts,,"received, sent",packets/s,IP Broadcast Packets,line,,proc.plugin,/proc/net/netstat
-ip.ecnpkts,,"CEP, NoECTP, ECTP0, ECTP1",packets/s,IP ECN Statistics,line,,proc.plugin,/proc/net/netstat
-ip.tcpmemorypressures,,pressures,events/s,TCP Memory Pressures,line,,proc.plugin,/proc/net/netstat
-ip.tcpconnaborts,,"baddata, userclosed, nomemory, timeout, linger, failed",connections/s,TCP Connection Aborts,line,,proc.plugin,/proc/net/netstat
-ip.tcpreorders,,"timestamp, sack, fack, reno",packets/s,TCP Reordered Packets by Detection Method,line,,proc.plugin,/proc/net/netstat
-ip.tcpofo,,"inqueue, dropped, merged, pruned",packets/s,TCP Out-Of-Order Queue,line,,proc.plugin,/proc/net/netstat
-ip.tcpsyncookies,,"received, sent, failed",packets/s,TCP SYN Cookies,line,,proc.plugin,/proc/net/netstat
-ip.tcp_syn_queue,,"drops, cookies",packets/s,TCP SYN Queue Issues,line,,proc.plugin,/proc/net/netstat
-ip.tcp_accept_queue,,"overflows, drops",packets/s,TCP Accept Queue Issues,line,,proc.plugin,/proc/net/netstat
-ipv4.packets,,"received, sent, forwarded, delivered",packets/s,IPv4 Packets,line,,proc.plugin,/proc/net/netstat
-ipv4.fragsout,,"ok, failed, created",packets/s,IPv4 Fragments Sent,line,,proc.plugin,/proc/net/netstat
-ipv4.fragsin,,"ok, failed, all",packets/s,IPv4 Fragments Reassembly,line,,proc.plugin,/proc/net/netstat
-ipv4.errors,,"InDiscards, OutDiscards, InHdrErrors, OutNoRoutes, InAddrErrors, InUnknownProtos",packets/s,IPv4 Errors,line,,proc.plugin,/proc/net/netstat
-ipv4.icmp,,"received, sent",packets/s,IPv4 ICMP Packets,line,,proc.plugin,/proc/net/netstat
-ipv4.icmp_errors,,"InErrors, OutErrors, InCsumErrors",packets/s,IPv4 ICMP Errors,line,,proc.plugin,/proc/net/netstat
-ipv4.icmpmsg,,"InEchoReps, OutEchoReps, InDestUnreachs, OutDestUnreachs, InRedirects, OutRedirects, InEchos, OutEchos, InRouterAdvert, OutRouterAdvert, InRouterSelect, OutRouterSelect, InTimeExcds, OutTimeExcds, InParmProbs, OutParmProbs, InTimestamps, OutTimestamps, InTimestampReps, OutTimestampReps",packets/s,IPv4 ICMP Messages,line,,proc.plugin,/proc/net/netstat
-ipv4.tcpsock,,connections,active connections,IPv4 TCP Connections,line,,proc.plugin,/proc/net/netstat
-ipv4.tcppackets,,"received, sent",packets/s,IPv4 TCP Packets,line,,proc.plugin,/proc/net/netstat
-ipv4.tcperrors,,"InErrs, InCsumErrors, RetransSegs",packets/s,IPv4 TCP Errors,line,,proc.plugin,/proc/net/netstat
-ipv4.tcpopens,,"active, passive",connections/s,IPv4 TCP Opens,line,,proc.plugin,/proc/net/netstat
-ipv4.tcphandshake,,"EstabResets, OutRsts, AttemptFails, SynRetrans",events/s,IPv4 TCP Handshake Issues,line,,proc.plugin,/proc/net/netstat
-ipv4.udppackets,,"received, sent",packets/s,IPv4 UDP Packets,line,,proc.plugin,/proc/net/netstat
-ipv4.udperrors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti",events/s,IPv4 UDP Errors,line,,proc.plugin,/proc/net/netstat
-ipv4.udplite,,"received, sent",packets/s,IPv4 UDPLite Packets,line,,proc.plugin,/proc/net/netstat
-ipv4.udplite_errors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti",packets/s,IPv4 UDPLite Errors,line,,proc.plugin,/proc/net/netstat
-system.ipv6,,"received, sent",kilobits/s,IPv6 Bandwidth,area,,proc.plugin,/proc/net/netstat
-system.ipv6,,"received, sent, forwarded, delivers",packets/s,IPv6 Packets,line,,proc.plugin,/proc/net/netstat
-ipv6.fragsout,,"ok, failed, all",packets/s,IPv6 Fragments Sent,line,,proc.plugin,/proc/net/netstat
-ipv6.fragsin,,"ok, failed, timeout, all",packets/s,IPv6 Fragments Reassembly,line,,proc.plugin,/proc/net/netstat
-ipv6.errors,,"InDiscards, OutDiscards, InHdrErrors, InAddrErrors, InUnknownProtos, InTooBigErrors, InTruncatedPkts, InNoRoutes, OutNoRoutes",packets/s,IPv6 Errors,line,,proc.plugin,/proc/net/netstat
-ipv6.udppackets,,"received, sent",packets/s,IPv6 UDP Packets,line,,proc.plugin,/proc/net/netstat
-ipv6.udperrors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti",events/s,IPv6 UDP Errors,line,,proc.plugin,/proc/net/netstat
-ipv6.udplitepackets,,"received, sent",packets/s,IPv6 UDPlite Packets,line,,proc.plugin,/proc/net/netstat
-ipv6.udpliteerrors,,"RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors",events/s,IPv6 UDP Lite Errors,line,,proc.plugin,/proc/net/netstat
-ipv6.mcast,,"received, sent",kilobits/s,IPv6 Multicast Bandwidth,area,,proc.plugin,/proc/net/netstat
-ipv6.bcast,,"received, sent",kilobits/s,IPv6 Broadcast Bandwidth,area,,proc.plugin,/proc/net/netstat
-ipv6.mcastpkts,,"received, sent",packets/s,IPv6 Multicast Packets,line,,proc.plugin,/proc/net/netstat
-ipv6.icmp,,"received, sent",messages/s,IPv6 ICMP Messages,line,,proc.plugin,/proc/net/netstat
-ipv6.icmpredir,,"received, sent",redirects/s,IPv6 ICMP Redirects,line,,proc.plugin,/proc/net/netstat
-ipv6.icmperrors,,"InErrors, OutErrors, InCsumErrors, InDestUnreachs, InPktTooBigs, InTimeExcds, InParmProblems, OutDestUnreachs, OutPktTooBigs, OutTimeExcds, OutParmProblems",errors/s,IPv6 ICMP Errors,line,,proc.plugin,/proc/net/netstat
-ipv6.icmpechos,,"InEchos, OutEchos, InEchoReplies, OutEchoReplies",messages/s,IPv6 ICMP Echo,line,,proc.plugin,/proc/net/netstat
-ipv6.groupmemb,,"InQueries, OutQueries, InResponses, OutResponses, InReductions, OutReductions",messages/s,IPv6 ICMP Group Membership,line,,proc.plugin,/proc/net/netstat
-ipv6.icmprouter,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,IPv6 Router Messages,line,,proc.plugin,/proc/net/netstat
-ipv6.icmpneighbor,,"InSolicits, OutSolicits, InAdvertisements, OutAdvertisements",messages/s,IPv6 Neighbor Messages,line,,proc.plugin,/proc/net/netstat
-ipv6.icmpmldv2,,"received, sent",reports/s,IPv6 ICMP MLDv2 Reports,line,,proc.plugin,/proc/net/netstat
-ipv6.icmptypes,,"InType1, InType128, InType129, InType136, OutType1, OutType128, OutType129, OutType133, OutType135, OutType143",messages/s,IPv6 ICMP Types,line,,proc.plugin,/proc/net/netstat
-ipv6.ect,,"InNoECTPkts, InECT1Pkts, InECT0Pkts, InCEPkts",packets/s,IPv6 ECT Packets,line,,proc.plugin,/proc/net/netstat
-ipv6.ect,,"InNoECTPkts, InECT1Pkts, InECT0Pkts, InCEPkts",packets/s,IPv6 ECT Packets,line,,proc.plugin,/proc/net/netstat
-ipv4.sockstat_sockets,,used,sockets,IPv4 Sockets Used,line,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_tcp_sockets,,"alloc, orphan, inuse, timewait",sockets,IPv4 TCP Sockets,line,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_tcp_mem,,mem,KiB,IPv4 TCP Sockets Memory,area,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_udp_sockets,,inuse,sockets,IPv4 UDP Sockets,line,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_udp_mem,,mem,sockets,IPv4 UDP Sockets Memory,line,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_udplite_sockets,,inuse,sockets,IPv4 UDPLITE Sockets,line,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_raw_sockets,,inuse,sockets,IPv4 RAW Sockets,line,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_frag_sockets,,inuse,fragments,IPv4 FRAG Sockets,line,,proc.plugin,/proc/net/sockstat
-ipv4.sockstat_frag_mem,,mem,KiB,IPv4 FRAG Sockets Memory,area,,proc.plugin,/proc/net/sockstat
-ipv6.sockstat6_tcp_sockets,,inuse,sockets,IPv6 TCP Sockets,line,,proc.plugin,/proc/net/sockstat6
-ipv6.sockstat6_udp_sockets,,inuse,sockets,IPv6 UDP Sockets,line,,proc.plugin,/proc/net/sockstat6
-ipv6.sockstat6_udplite_sockets,,inuse,sockets,IPv6 UDPLITE Sockets,line,,proc.plugin,/proc/net/sockstat6
-ipv6.sockstat6_raw_sockets,,inuse,sockets,IPv6 RAW Sockets,line,,proc.plugin,/proc/net/sockstat6
-ipv6.sockstat6_frag_sockets,,inuse,fragments,IPv6 FRAG Sockets,line,,proc.plugin,/proc/net/sockstat6
-ipvs.sockets,,connections,connections/s,IPVS New Connections,line,,proc.plugin,/proc/net/ip_vs_stats
-ipvs.packets,,"received, sent",packets/s,IPVS Packets,line,,proc.plugin,/proc/net/ip_vs_stats
-ipvs.net,,"received, sent",kilobits/s,IPVS Bandwidth,area,,proc.plugin,/proc/net/ip_vs_stats
-nfs.net,,"udp, tcp",operations/s,NFS Client Network,stacked,,proc.plugin,/proc/net/rpc/nfs
-nfs.rpc,,"calls, retransmits, auth_refresh",calls/s,NFS Client Remote Procedure Calls Statistics,line,,proc.plugin,/proc/net/rpc/nfs
-nfs.proc2,,a dimension per proc2 call,calls/s,NFS v2 Client Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfs
-nfs.proc3,,a dimension per proc3 call,calls/s,NFS v3 Client Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfs
-nfs.proc4,,a dimension per proc4 call,calls/s,NFS v4 Client Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfs
-nfsd.readcache,,"hits, misses, nocache",reads/s,NFS Server Read Cache,stacked,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.filehandles,,stale,handles/s,NFS Server File Handles,line,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.io,,"read, write",kilobytes/s,NFS Server I/O,area,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.threads,,threads,threads,NFS Server Threads,line,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.net,,"udp, tcp",packets/s,NFS Server Network Statistics,line,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.rpc,,"calls, bad_format, bad_auth",calls/s,NFS Server Remote Procedure Calls Statistics,line,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.proc2,,a dimension per proc2 call,calls/s,NFS v2 Server Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.proc3,,a dimension per proc3 call,calls/s,NFS v3 Server Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.proc4,,a dimension per proc4 call,calls/s,NFS v4 Server Remote Procedure Calls,stacked,,proc.plugin,/proc/net/rpc/nfsd
-nfsd.proc4ops,,a dimension per proc4 operation,operations/s,NFS v4 Server Operations,stacked,,proc.plugin,/proc/net/rpc/nfsd
-sctp.established,,established,associations,SCTP current total number of established associations,line,,proc.plugin,/proc/net/sctp/snmp
-sctp.transitions,,"active, passive, aborted, shutdown",transitions/s,SCTP Association Transitions,line,,proc.plugin,/proc/net/sctp/snmp
-sctp.packets,,"received, sent",packets/s,SCTP Packets,line,,proc.plugin,/proc/net/sctp/snmp
-sctp.packet_errors,,"invalid, checksum",packets/s,SCTP Packet Errors,line,,proc.plugin,/proc/net/sctp/snmp
-sctp.fragmentation,,"reassembled, fragmented",packets/s,SCTP Fragmentation,line,,proc.plugin,/proc/net/sctp/snmp
-netfilter.conntrack_sockets,,connections,active connections,Connection Tracker Connections,line,,proc.plugin,/proc/net/stat/nf_conntrack
-netfilter.conntrack_new,,"new, ignore, invalid",connections/s,Connection Tracker New Connections,line,,proc.plugin,/proc/net/stat/nf_conntrack
-netfilter.conntrack_changes,,"inserted, deleted, delete_list",changes/s,Connection Tracker Changes,line,,proc.plugin,/proc/net/stat/nf_conntrack
-netfilter.conntrack_expect,,"created, deleted, new",expectations/s,Connection Tracker Expectations,line,,proc.plugin,/proc/net/stat/nf_conntrack
-netfilter.conntrack_search,,"searched, restarted, found",searches/s,Connection Tracker Searches,line,,proc.plugin,/proc/net/stat/nf_conntrack
-netfilter.conntrack_errors,,"icmp_error, error_failed, drop, early_drop",events/s,Connection Tracker Errors,line,,proc.plugin,/proc/net/stat/nf_conntrack
-netfilter.synproxy_syn_received,,received,packets/s,SYNPROXY SYN Packets received,line,,proc.plugin,/proc/net/stat/synproxy
-netfilter.synproxy_conn_reopened,,reopened,connections/s,SYNPROXY Connections Reopened,line,,proc.plugin,/proc/net/stat/synproxy
-netfilter.synproxy_cookies,,"valid, invalid, retransmits",cookies/s,SYNPROXY TCP Cookies,line,,proc.plugin,/proc/net/stat/synproxy
-zfspool.state,zfs pool,"online, degraded, faulted, offline, removed, unavail, suspended",boolean,"ZFS pool state",line,pool,proc.plugin,/proc/spl/kstat/zfs
-zfs.arc_size,,"arcsz, target, min, max",MiB,"ZFS ARC Size",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.l2_size,,"actual, size",MiB,"ZFS L2 ARC Size",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.reads,,"arc, demand, prefetch, metadata, l2",reads/s,"ZFS Reads",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.bytes,,"read, write",KiB/s,"ZFS ARC L2 Read/Write Rate",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.hits,,"hits, misses",percentage,"ZFS ARC Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.hits_rate,,"hits, misses",events/s,"ZFS ARC Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.dhits,,"hits, misses",percentage,"ZFS Demand Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.dhits_rate,,"hits, misses",events/s,"ZFS Demand Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.phits,,"hits, misses",percentage,"ZFS Prefetch Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.phits_rate,,"hits, misses",events/s,"ZFS Prefetch Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.mhits,,"hits, misses",percentage,"ZFS Metadata Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.mhits_rate,,"hits, misses",events/s,"ZFS Metadata Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.l2hits,,"hits, misses",percentage,"ZFS L2 Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.l2hits_rate,,"hits, misses",events/s,"ZFS L2 Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.list_hits,,"mfu, mfu_ghost, mru, mru_ghost",hits/s,"ZFS List Hits",area,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.arc_size_breakdown,,"recent, frequent",percentage,"ZFS ARC Size Breakdown",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.memory_ops,,"direct, throttled, indirect",operations/s,"ZFS Memory Operations",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.important_ops,,"evict_skip, deleted, mutex_miss, hash_collisions",operations/s,"ZFS Important Operations",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.actual_hits,,"hits, misses",percentage,"ZFS Actual Cache Hits",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.actual_hits_rate,,"hits, misses",events/s,"ZFS Actual Cache Hits Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.demand_data_hits,,"hits, misses",percentage,"ZFS Data Demand Efficiency",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.demand_data_hits_rate,,"hits, misses",events/s,"ZFS Data Demand Efficiency Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.prefetch_data_hits,,"hits, misses",percentage,"ZFS Data Prefetch Efficiency",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.prefetch_data_hits_rate,,"hits, misses",events/s,"ZFS Data Prefetch Efficiency Rate",stacked,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.hash_elements,,"current, max",elements,"ZFS ARC Hash Elements",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-zfs.hash_chains,,"current, max",chains,"ZFS ARC Hash Chains",line,,proc.plugin,/proc/spl/kstat/zfs/arcstats
-btrfs.disk,btrfs filesystem,"unallocated, data_free, data_used, meta_free, meta_used, sys_free, sys_used",MiB,"BTRFS Physical Disk Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-btrfs.data,btrfs filesystem,"free, used",MiB,"BTRFS Data Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-btrfs.metadata,btrfs filesystem,"free, used, reserved",MiB,"BTRFS Metadata Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-btrfs.system,btrfs filesystem,"free, used",MiB,"BTRFS System Allocation",stacked,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-btrfs.commits,btrfs filesystem,commits,commits,"BTRFS Commits",line,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-btrfs.commits_perc_time,btrfs filesystem,commits,percentage,"BTRFS Commits Time Share",line,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-btrfs.commit_timings,btrfs filesystem,"last, max",ms,"BTRFS Commit Timings",line,"filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-btrfs.device_errors,btrfs device,"write_errs, read_errs, flush_errs, corruption_errs, generation_errs",errors,"BTRFS Device Errors",line,"device_id, filesystem_uuid, filesystem_label",proc.plugin,/sys/fs/btrfs
-powersupply.capacity,power device,capacity,percentage,Battery capacity,line,device,proc.plugin,/sys/class/power_supply
-powersupply.charge,power device,"empty_design, empty, now, full, full_design",Ah,Battery charge,line,device,proc.plugin,/sys/class/power_supply
-powersupply.energy,power device,"empty_design, empty, now, full, full_design",Wh,Battery energy,line,device,proc.plugin,/sys/class/power_supply
-powersupply.voltage,power device,"min_design, min, now, max, max_design",V,Power supply voltage,line,device,proc.plugin,/sys/class/power_supply \ No newline at end of file
diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c
index c1a3293f..fbcaa614 100644
--- a/collectors/proc.plugin/plugin_proc.c
+++ b/collectors/proc.plugin/plugin_proc.c
@@ -33,7 +33,8 @@ static struct proc_module {
{.name = "/proc/meminfo", .dim = "meminfo", .func = do_proc_meminfo},
{.name = "/sys/kernel/mm/ksm", .dim = "ksm", .func = do_sys_kernel_mm_ksm},
{.name = "/sys/block/zram", .dim = "zram", .func = do_sys_block_zram},
- {.name = "/sys/devices/system/edac/mc", .dim = "ecc", .func = do_proc_sys_devices_system_edac_mc},
+ {.name = "/sys/devices/system/edac/mc", .dim = "edac", .func = do_proc_sys_devices_system_edac_mc},
+ {.name = "/sys/devices/pci/aer", .dim = "pci_aer", .func = do_proc_sys_devices_pci_aer},
{.name = "/sys/devices/system/node", .dim = "numa", .func = do_proc_sys_devices_system_node},
{.name = "/proc/pagetypeinfo", .dim = "pagetypeinfo", .func = do_proc_pagetypeinfo},
@@ -69,8 +70,11 @@ static struct proc_module {
// IPC metrics
{.name = "ipc", .dim = "ipc", .func = do_ipc},
- {.name = "/sys/class/power_supply", .dim = "power_supply", .func = do_sys_class_power_supply},
// linux power supply metrics
+ {.name = "/sys/class/power_supply", .dim = "power_supply", .func = do_sys_class_power_supply},
+
+ // GPU metrics
+ {.name = "/sys/class/drm", .dim = "drm", .func = do_sys_class_drm},
// the terminator of this array
{.name = NULL, .dim = NULL, .func = NULL}
diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h
index 2b2cabca..a90f4838 100644
--- a/collectors/proc.plugin/plugin_proc.h
+++ b/collectors/proc.plugin/plugin_proc.h
@@ -34,6 +34,7 @@ int do_proc_net_stat_synproxy(int update_every, usec_t dt);
int do_proc_net_softnet_stat(int update_every, usec_t dt);
int do_proc_uptime(int update_every, usec_t dt);
int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt);
+int do_proc_sys_devices_pci_aer(int update_every, usec_t dt);
int do_proc_sys_devices_system_node(int update_every, usec_t dt);
int do_proc_spl_kstat_zfs_arcstats(int update_every, usec_t dt);
int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt);
@@ -45,6 +46,7 @@ int do_ipc(int update_every, usec_t dt);
int do_sys_class_power_supply(int update_every, usec_t dt);
int do_proc_pagetypeinfo(int update_every, usec_t dt);
int do_sys_class_infiniband(int update_every, usec_t dt);
+int do_sys_class_drm(int update_every, usec_t dt);
int get_numa_node_count(void);
// metrics that need to be shared among data collectors
diff --git a/collectors/proc.plugin/proc_meminfo.c b/collectors/proc.plugin/proc_meminfo.c
index 6988c70e..cd1ba872 100644
--- a/collectors/proc.plugin/proc_meminfo.c
+++ b/collectors/proc.plugin/proc_meminfo.c
@@ -9,58 +9,92 @@ int do_proc_meminfo(int update_every, usec_t dt) {
(void)dt;
static procfile *ff = NULL;
- static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1, do_hugepages = -1, do_transparent_hugepages = -1;
- static int do_percpu = 0;
+ static int do_ram = -1
+ , do_swap = -1
+ , do_hwcorrupt = -1
+ , do_committed = -1
+ , do_writeback = -1
+ , do_kernel = -1
+ , do_slab = -1
+ , do_hugepages = -1
+ , do_transparent_hugepages = -1
+ , do_reclaiming = -1
+ , do_high_low = -1
+ , do_cma = -1
+ , do_directmap = -1;
static ARL_BASE *arl_base = NULL;
- static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL;
+ static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL,
+ *arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL,
+ *arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL;
static unsigned long long
- MemTotal = 0,
- MemFree = 0,
- MemAvailable = 0,
- Buffers = 0,
- Cached = 0,
- //SwapCached = 0,
- //Active = 0,
- //Inactive = 0,
- //ActiveAnon = 0,
- //InactiveAnon = 0,
- //ActiveFile = 0,
- //InactiveFile = 0,
- //Unevictable = 0,
- //Mlocked = 0,
- SwapTotal = 0,
- SwapFree = 0,
- Dirty = 0,
- Writeback = 0,
- //AnonPages = 0,
- //Mapped = 0,
- Shmem = 0,
- Slab = 0,
- SReclaimable = 0,
- SUnreclaim = 0,
- KernelStack = 0,
- PageTables = 0,
- NFS_Unstable = 0,
- Bounce = 0,
- WritebackTmp = 0,
- //CommitLimit = 0,
- Committed_AS = 0,
- //VmallocTotal = 0,
- VmallocUsed = 0,
- //VmallocChunk = 0,
- Percpu = 0,
- AnonHugePages = 0,
- ShmemHugePages = 0,
- HugePages_Total = 0,
- HugePages_Free = 0,
- HugePages_Rsvd = 0,
- HugePages_Surp = 0,
- Hugepagesize = 0,
- //DirectMap4k = 0,
- //DirectMap2M = 0,
- HardwareCorrupted = 0;
+ MemTotal = 0
+ , MemFree = 0
+ , MemAvailable = 0
+ , Buffers = 0
+ , Cached = 0
+ , SwapCached = 0
+ , Active = 0
+ , Inactive = 0
+ , ActiveAnon = 0
+ , InactiveAnon = 0
+ , ActiveFile = 0
+ , InactiveFile = 0
+ , Unevictable = 0
+ , Mlocked = 0
+ , HighTotal = 0
+ , HighFree = 0
+ , LowTotal = 0
+ , LowFree = 0
+ , MmapCopy = 0
+ , SwapTotal = 0
+ , SwapFree = 0
+ , Zswap = 0
+ , Zswapped = 0
+ , Dirty = 0
+ , Writeback = 0
+ , AnonPages = 0
+ , Mapped = 0
+ , Shmem = 0
+ , KReclaimable = 0
+ , Slab = 0
+ , SReclaimable = 0
+ , SUnreclaim = 0
+ , KernelStack = 0
+ , ShadowCallStack = 0
+ , PageTables = 0
+ , SecPageTables = 0
+ , NFS_Unstable = 0
+ , Bounce = 0
+ , WritebackTmp = 0
+ , CommitLimit = 0
+ , Committed_AS = 0
+ , VmallocTotal = 0
+ , VmallocUsed = 0
+ , VmallocChunk = 0
+ , Percpu = 0
+ //, EarlyMemtestBad = 0
+ , HardwareCorrupted = 0
+ , AnonHugePages = 0
+ , ShmemHugePages = 0
+ , ShmemPmdMapped = 0
+ , FileHugePages = 0
+ , FilePmdMapped = 0
+ , CmaTotal = 0
+ , CmaFree = 0
+ //, Unaccepted = 0
+ , HugePages_Total = 0
+ , HugePages_Free = 0
+ , HugePages_Rsvd = 0
+ , HugePages_Surp = 0
+ , Hugepagesize = 0
+ //, Hugetlb = 0
+ , DirectMap4k = 0
+ , DirectMap2M = 0
+ , DirectMap4M = 0
+ , DirectMap1G = 0
+ ;
if(unlikely(!arl_base)) {
do_ram = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "system ram", 1);
@@ -72,6 +106,12 @@ int do_proc_meminfo(int update_every, usec_t dt) {
do_slab = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "slab memory", 1);
do_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "hugepages", CONFIG_BOOLEAN_AUTO);
do_transparent_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "transparent hugepages", CONFIG_BOOLEAN_AUTO);
+ do_reclaiming = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "memory reclaiming", CONFIG_BOOLEAN_AUTO);
+ do_high_low = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "high low memory", CONFIG_BOOLEAN_AUTO);
+ do_cma = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "cma memory", CONFIG_BOOLEAN_AUTO);
+ do_directmap = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "direct maps", CONFIG_BOOLEAN_AUTO);
+
+ // https://github.com/torvalds/linux/blob/master/fs/proc/meminfo.c
arl_base = arl_create("meminfo", NULL, 60);
arl_expect(arl_base, "MemTotal", &MemTotal);
@@ -79,46 +119,90 @@ int do_proc_meminfo(int update_every, usec_t dt) {
arl_memavailable = arl_expect(arl_base, "MemAvailable", &MemAvailable);
arl_expect(arl_base, "Buffers", &Buffers);
arl_expect(arl_base, "Cached", &Cached);
- //arl_expect(arl_base, "SwapCached", &SwapCached);
- //arl_expect(arl_base, "Active", &Active);
- //arl_expect(arl_base, "Inactive", &Inactive);
- //arl_expect(arl_base, "ActiveAnon", &ActiveAnon);
- //arl_expect(arl_base, "InactiveAnon", &InactiveAnon);
- //arl_expect(arl_base, "ActiveFile", &ActiveFile);
- //arl_expect(arl_base, "InactiveFile", &InactiveFile);
- //arl_expect(arl_base, "Unevictable", &Unevictable);
- //arl_expect(arl_base, "Mlocked", &Mlocked);
+ arl_expect(arl_base, "SwapCached", &SwapCached);
+ arl_expect(arl_base, "Active", &Active);
+ arl_expect(arl_base, "Inactive", &Inactive);
+ arl_expect(arl_base, "Active(anon)", &ActiveAnon);
+ arl_expect(arl_base, "Inactive(anon)", &InactiveAnon);
+ arl_expect(arl_base, "Active(file)", &ActiveFile);
+ arl_expect(arl_base, "Inactive(file)", &InactiveFile);
+ arl_expect(arl_base, "Unevictable", &Unevictable);
+ arl_expect(arl_base, "Mlocked", &Mlocked);
+
+ // CONFIG_HIGHMEM
+ arl_high_low = arl_expect(arl_base, "HighTotal", &HighTotal);
+ arl_expect(arl_base, "HighFree", &HighFree);
+ arl_expect(arl_base, "LowTotal", &LowTotal);
+ arl_expect(arl_base, "LowFree", &LowFree);
+
+ // CONFIG_MMU
+ arl_expect(arl_base, "MmapCopy", &MmapCopy);
+
arl_expect(arl_base, "SwapTotal", &SwapTotal);
arl_expect(arl_base, "SwapFree", &SwapFree);
+
+ // CONFIG_ZSWAP
+ arl_zswapped = arl_expect(arl_base, "Zswap", &Zswap);
+ arl_expect(arl_base, "Zswapped", &Zswapped);
+
arl_expect(arl_base, "Dirty", &Dirty);
arl_expect(arl_base, "Writeback", &Writeback);
- //arl_expect(arl_base, "AnonPages", &AnonPages);
- //arl_expect(arl_base, "Mapped", &Mapped);
+ arl_expect(arl_base, "AnonPages", &AnonPages);
+ arl_expect(arl_base, "Mapped", &Mapped);
arl_expect(arl_base, "Shmem", &Shmem);
+ arl_expect(arl_base, "KReclaimable", &KReclaimable);
arl_expect(arl_base, "Slab", &Slab);
arl_expect(arl_base, "SReclaimable", &SReclaimable);
arl_expect(arl_base, "SUnreclaim", &SUnreclaim);
arl_expect(arl_base, "KernelStack", &KernelStack);
+
+ // CONFIG_SHADOW_CALL_STACK
+ arl_expect(arl_base, "ShadowCallStack", &ShadowCallStack);
+
arl_expect(arl_base, "PageTables", &PageTables);
+ arl_expect(arl_base, "SecPageTables", &SecPageTables);
arl_expect(arl_base, "NFS_Unstable", &NFS_Unstable);
arl_expect(arl_base, "Bounce", &Bounce);
arl_expect(arl_base, "WritebackTmp", &WritebackTmp);
- //arl_expect(arl_base, "CommitLimit", &CommitLimit);
+ arl_expect(arl_base, "CommitLimit", &CommitLimit);
arl_expect(arl_base, "Committed_AS", &Committed_AS);
- //arl_expect(arl_base, "VmallocTotal", &VmallocTotal);
+ arl_expect(arl_base, "VmallocTotal", &VmallocTotal);
arl_expect(arl_base, "VmallocUsed", &VmallocUsed);
- //arl_expect(arl_base, "VmallocChunk", &VmallocChunk);
+ arl_expect(arl_base, "VmallocChunk", &VmallocChunk);
arl_expect(arl_base, "Percpu", &Percpu);
+
+ // CONFIG_MEMTEST
+ //arl_expect(arl_base, "EarlyMemtestBad", &EarlyMemtestBad);
+
+ // CONFIG_MEMORY_FAILURE
arl_hwcorrupted = arl_expect(arl_base, "HardwareCorrupted", &HardwareCorrupted);
+
+ // CONFIG_TRANSPARENT_HUGEPAGE
arl_expect(arl_base, "AnonHugePages", &AnonHugePages);
arl_expect(arl_base, "ShmemHugePages", &ShmemHugePages);
- arl_expect(arl_base, "HugePages_Total", &HugePages_Total);
+ arl_expect(arl_base, "ShmemPmdMapped", &ShmemPmdMapped);
+ arl_expect(arl_base, "FileHugePages", &FileHugePages);
+ arl_expect(arl_base, "FilePmdMapped", &FilePmdMapped);
+
+ // CONFIG_CMA
+ arl_cma_total = arl_expect(arl_base, "CmaTotal", &CmaTotal);
+ arl_expect(arl_base, "CmaFree", &CmaFree);
+
+ // CONFIG_UNACCEPTED_MEMORY
+ //arl_expect(arl_base, "Unaccepted", &Unaccepted);
+
+ // these appear only when hugepages are supported
+ arl_hugepages_total = arl_expect(arl_base, "HugePages_Total", &HugePages_Total);
arl_expect(arl_base, "HugePages_Free", &HugePages_Free);
arl_expect(arl_base, "HugePages_Rsvd", &HugePages_Rsvd);
arl_expect(arl_base, "HugePages_Surp", &HugePages_Surp);
arl_expect(arl_base, "Hugepagesize", &Hugepagesize);
- //arl_expect(arl_base, "DirectMap4k", &DirectMap4k);
- //arl_expect(arl_base, "DirectMap2M", &DirectMap2M);
+ //arl_expect(arl_base, "Hugetlb", &Hugetlb);
+
+ arl_directmap4k = arl_expect(arl_base, "DirectMap4k", &DirectMap4k);
+ arl_directmap2m = arl_expect(arl_base, "DirectMap2M", &DirectMap2M);
+ arl_directmap4m = arl_expect(arl_base, "DirectMap4M", &DirectMap4M);
+ arl_directmap1g = arl_expect(arl_base, "DirectMap1G", &DirectMap1G);
}
if(unlikely(!ff)) {
@@ -136,26 +220,17 @@ int do_proc_meminfo(int update_every, usec_t dt) {
size_t lines = procfile_lines(ff), l;
arl_begin(arl_base);
-
- static int first_ff_read = 1;
-
for(l = 0; l < lines ;l++) {
size_t words = procfile_linewords(ff, l);
if(unlikely(words < 2)) continue;
- if (first_ff_read && !strcmp(procfile_lineword(ff, l, 0), "Percpu"))
- do_percpu = 1;
-
if(unlikely(arl_check(arl_base,
procfile_lineword(ff, l, 0),
procfile_lineword(ff, l, 1)))) break;
}
- if (first_ff_read)
- first_ff_read = 0;
-
// http://calimeroteknik.free.fr/blag/?article20/really-used-memory-on-gnu-linux
- unsigned long long MemCached = Cached + SReclaimable - Shmem;
+ unsigned long long MemCached = Cached + SReclaimable + KReclaimable - Shmem;
unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers;
// The Linux kernel doesn't report ZFS ARC usage as cache memory (the ARC is included in the total used system memory)
if (!inside_lxc_container) {
@@ -207,7 +282,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
"mem"
, "available"
, NULL
- , "system"
+ , "overview"
, NULL
, "Available RAM for applications"
, "MiB"
@@ -238,7 +313,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
if(unlikely(!st_system_swap)) {
st_system_swap = rrdset_create_localhost(
- "system"
+ "mem"
, "swap"
, NULL
, "swap"
@@ -247,7 +322,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
- , NETDATA_CHART_PRIO_SYSTEM_SWAP
+ , NETDATA_CHART_PRIO_MEM_SWAP
, update_every
, RRDSET_TYPE_STACKED
);
@@ -261,6 +336,62 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rrddim_set_by_pointer(st_system_swap, rd_used, SwapUsed);
rrddim_set_by_pointer(st_system_swap, rd_free, SwapFree);
rrdset_done(st_system_swap);
+
+ {
+ static RRDSET *st_mem_swap_cached = NULL;
+ static RRDDIM *rd_cached = NULL;
+
+ if (unlikely(!st_mem_swap_cached)) {
+ st_mem_swap_cached = rrdset_create_localhost(
+ "mem"
+ , "swap_cached"
+ , NULL
+ , "swap"
+ , NULL
+ , "Swap Memory Cached in RAM"
+ , "MiB"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_MEMINFO_NAME
+ , NETDATA_CHART_PRIO_MEM_SWAP + 1
+ , update_every
+ , RRDSET_TYPE_AREA
+ );
+
+ rd_cached = rrddim_add(st_mem_swap_cached, "cached", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_mem_swap_cached, rd_cached, SwapCached);
+ rrdset_done(st_mem_swap_cached);
+ }
+
+ if(arl_zswapped->flags & ARL_ENTRY_FLAG_FOUND) {
+ static RRDSET *st_mem_zswap = NULL;
+ static RRDDIM *rd_zswap = NULL, *rd_zswapped = NULL;
+
+ if (unlikely(!st_mem_zswap)) {
+ st_mem_zswap = rrdset_create_localhost(
+ "mem"
+ , "zswap"
+ , NULL
+ , "zswap"
+ , NULL
+ , "Zswap Usage"
+ , "MiB"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_MEMINFO_NAME
+ , NETDATA_CHART_PRIO_MEM_ZSWAP
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ rd_zswap = rrddim_add(st_mem_zswap, "zswap", "in-ram", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_zswapped = rrddim_add(st_mem_zswap, "zswapped", "on-disk", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_mem_zswap, rd_zswap, Zswap);
+ rrddim_set_by_pointer(st_mem_zswap, rd_zswapped, Zswapped);
+ rrdset_done(st_mem_zswap);
+ }
}
if(arl_hwcorrupted->flags & ARL_ENTRY_FLAG_FOUND &&
@@ -306,7 +437,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
"mem"
, "committed"
, NULL
- , "system"
+ , "overview"
, NULL
, "Committed (Allocated) Memory"
, "MiB"
@@ -335,7 +466,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
"mem"
, "writeback"
, NULL
- , "kernel"
+ , "writeback"
, NULL
, "Writeback Memory"
, "MiB"
@@ -367,7 +498,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
if(do_kernel) {
static RRDSET *st_mem_kernel = NULL;
static RRDDIM *rd_slab = NULL, *rd_kernelstack = NULL, *rd_pagetables = NULL, *rd_vmallocused = NULL,
- *rd_percpu = NULL;
+ *rd_percpu = NULL, *rd_kreclaimable = NULL;
if(unlikely(!st_mem_kernel)) {
st_mem_kernel = rrdset_create_localhost(
@@ -391,16 +522,16 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rd_kernelstack = rrddim_add(st_mem_kernel, "KernelStack", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_pagetables = rrddim_add(st_mem_kernel, "PageTables", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_vmallocused = rrddim_add(st_mem_kernel, "VmallocUsed", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
- if (do_percpu)
- rd_percpu = rrddim_add(st_mem_kernel, "Percpu", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_percpu = rrddim_add(st_mem_kernel, "Percpu", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_kreclaimable = rrddim_add(st_mem_kernel, "KReclaimable", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
- rrddim_set_by_pointer(st_mem_kernel, rd_slab, Slab);
- rrddim_set_by_pointer(st_mem_kernel, rd_kernelstack, KernelStack);
- rrddim_set_by_pointer(st_mem_kernel, rd_pagetables, PageTables);
- rrddim_set_by_pointer(st_mem_kernel, rd_vmallocused, VmallocUsed);
- if (do_percpu)
- rrddim_set_by_pointer(st_mem_kernel, rd_percpu, Percpu);
+ rrddim_set_by_pointer(st_mem_kernel, rd_slab, Slab);
+ rrddim_set_by_pointer(st_mem_kernel, rd_kernelstack, KernelStack);
+ rrddim_set_by_pointer(st_mem_kernel, rd_pagetables, PageTables);
+ rrddim_set_by_pointer(st_mem_kernel, rd_vmallocused, VmallocUsed);
+ rrddim_set_by_pointer(st_mem_kernel, rd_percpu, Percpu);
+ rrddim_set_by_pointer(st_mem_kernel, rd_kreclaimable, KReclaimable);
rrdset_done(st_mem_kernel);
}
@@ -436,9 +567,10 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rrdset_done(st_mem_slab);
}
- if(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO &&
+ if(arl_hugepages_total->flags & ARL_ENTRY_FLAG_FOUND &&
+ (do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO &&
((Hugepagesize && HugePages_Total) ||
- netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
+ netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) {
do_hugepages = CONFIG_BOOLEAN_YES;
static RRDSET *st_mem_hugepages = NULL;
@@ -455,7 +587,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
- , NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1
+ , NETDATA_CHART_PRIO_MEM_HUGEPAGES
, update_every
, RRDSET_TYPE_STACKED
);
@@ -487,7 +619,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
if(unlikely(!st_mem_transparent_hugepages)) {
st_mem_transparent_hugepages = rrdset_create_localhost(
"mem"
- , "transparent_hugepages"
+ , "thp"
, NULL
, "hugepages"
, NULL
@@ -495,7 +627,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
- , NETDATA_CHART_PRIO_MEM_HUGEPAGES
+ , NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1
, update_every
, RRDSET_TYPE_STACKED
);
@@ -509,6 +641,206 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_anonymous, AnonHugePages);
rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_shared, ShmemHugePages);
rrdset_done(st_mem_transparent_hugepages);
+
+ {
+ static RRDSET *st_mem_thp_details = NULL;
+ static RRDDIM *rd_shmem_pmd_mapped = NULL, *rd_file_huge_pages = NULL, *rd_file_pmd_mapped = NULL;
+
+ if(unlikely(!st_mem_thp_details)) {
+ st_mem_thp_details = rrdset_create_localhost(
+ "mem"
+ , "thp_details"
+ , NULL
+ , "hugepages"
+ , NULL
+ , "Details of Transparent HugePages Usage"
+ , "MiB"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_MEMINFO_NAME
+ , NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_flag_set(st_mem_thp_details, RRDSET_FLAG_DETAIL);
+
+ rd_shmem_pmd_mapped = rrddim_add(st_mem_thp_details, "shmem_pmd", "ShmemPmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_file_huge_pages = rrddim_add(st_mem_thp_details, "file", "FileHugePages", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_file_pmd_mapped = rrddim_add(st_mem_thp_details, "file_pmd", "FilePmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_mem_thp_details, rd_shmem_pmd_mapped, ShmemPmdMapped);
+ rrddim_set_by_pointer(st_mem_thp_details, rd_file_huge_pages, FileHugePages);
+ rrddim_set_by_pointer(st_mem_thp_details, rd_file_pmd_mapped, FilePmdMapped);
+ rrdset_done(st_mem_thp_details);
+ }
+ }
+
+ if(do_reclaiming != CONFIG_BOOLEAN_NO) {
+ static RRDSET *st_mem_reclaiming = NULL;
+ static RRDDIM *rd_active = NULL, *rd_inactive = NULL,
+ *rd_active_anon = NULL, *rd_inactive_anon = NULL,
+ *rd_active_file = NULL, *rd_inactive_file = NULL,
+ *rd_unevictable = NULL, *rd_mlocked = NULL;
+
+ if(unlikely(!st_mem_reclaiming)) {
+ st_mem_reclaiming = rrdset_create_localhost(
+ "mem"
+ , "reclaiming"
+ , NULL
+ , "reclaiming"
+ , NULL
+ , "Memory Reclaiming"
+ , "MiB"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_MEMINFO_NAME
+ , NETDATA_CHART_PRIO_MEM_RECLAIMING
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_flag_set(st_mem_reclaiming, RRDSET_FLAG_DETAIL);
+
+ rd_active = rrddim_add(st_mem_reclaiming, "active", "Active", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_inactive = rrddim_add(st_mem_reclaiming, "inactive", "Inactive", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_active_anon = rrddim_add(st_mem_reclaiming, "active_anon", "Active(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_inactive_anon = rrddim_add(st_mem_reclaiming, "inactive_anon", "Inactive(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_active_file = rrddim_add(st_mem_reclaiming, "active_file", "Active(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_inactive_file = rrddim_add(st_mem_reclaiming, "inactive_file", "Inactive(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_unevictable = rrddim_add(st_mem_reclaiming, "unevictable", "Unevictable", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_mlocked = rrddim_add(st_mem_reclaiming, "mlocked", "Mlocked", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_active, Active);
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive, Inactive);
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_active_anon, ActiveAnon);
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_anon, InactiveAnon);
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_active_file, ActiveFile);
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_file, InactiveFile);
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_unevictable, Unevictable);
+ rrddim_set_by_pointer(st_mem_reclaiming, rd_mlocked, Mlocked);
+
+ rrdset_done(st_mem_reclaiming);
+ }
+
+ if(do_high_low != CONFIG_BOOLEAN_NO && (arl_high_low->flags & ARL_ENTRY_FLAG_FOUND)) {
+ static RRDSET *st_mem_high_low = NULL;
+ static RRDDIM *rd_high_used = NULL, *rd_low_used = NULL;
+ static RRDDIM *rd_high_free = NULL, *rd_low_free = NULL;
+
+ if(unlikely(!st_mem_high_low)) {
+ st_mem_high_low = rrdset_create_localhost(
+ "mem"
+ , "high_low"
+ , NULL
+ , "high_low"
+ , NULL
+ , "High and Low Used and Free Memory Areas"
+ , "MiB"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_MEMINFO_NAME
+ , NETDATA_CHART_PRIO_MEM_HIGH_LOW
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ rrdset_flag_set(st_mem_high_low, RRDSET_FLAG_DETAIL);
+
+ rd_high_used = rrddim_add(st_mem_high_low, "high_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_low_used = rrddim_add(st_mem_high_low, "low_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_high_free = rrddim_add(st_mem_high_low, "high_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_low_free = rrddim_add(st_mem_high_low, "low_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_mem_high_low, rd_high_used, HighTotal - HighFree);
+ rrddim_set_by_pointer(st_mem_high_low, rd_low_used, LowTotal - LowFree);
+ rrddim_set_by_pointer(st_mem_high_low, rd_high_free, HighFree);
+ rrddim_set_by_pointer(st_mem_high_low, rd_low_free, LowFree);
+ rrdset_done(st_mem_high_low);
+ }
+
+ if(do_cma == CONFIG_BOOLEAN_YES || (do_cma == CONFIG_BOOLEAN_AUTO && (arl_cma_total->flags & ARL_ENTRY_FLAG_FOUND) && CmaTotal)) {
+ do_cma = CONFIG_BOOLEAN_YES;
+
+ static RRDSET *st_mem_cma = NULL;
+ static RRDDIM *rd_used = NULL, *rd_free = NULL;
+
+ if(unlikely(!st_mem_cma)) {
+ st_mem_cma = rrdset_create_localhost(
+ "mem"
+ , "cma"
+ , NULL
+ , "cma"
+ , NULL
+ , "Contiguous Memory Allocator (CMA) Memory"
+ , "MiB"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_MEMINFO_NAME
+ , NETDATA_CHART_PRIO_MEM_CMA
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ rd_used = rrddim_add(st_mem_cma, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ rd_free = rrddim_add(st_mem_cma, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_mem_cma, rd_used, CmaTotal - CmaFree);
+ rrddim_set_by_pointer(st_mem_cma, rd_free, CmaFree);
+ rrdset_done(st_mem_cma);
+ }
+
+ if(do_directmap != CONFIG_BOOLEAN_NO &&
+ ((arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND) ||
+ (arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND) ||
+ (arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND) ||
+ (arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND)))
+ {
+ static RRDSET *st_mem_directmap = NULL;
+ static RRDDIM *rd_4k = NULL, *rd_2m = NULL, *rd_1g = NULL, *rd_4m = NULL;
+
+ if(unlikely(!st_mem_directmap)) {
+ st_mem_directmap = rrdset_create_localhost(
+ "mem"
+ , "directmaps"
+ , NULL
+ , "overview"
+ , NULL
+ , "Direct Memory Mappings"
+ , "MiB"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_MEMINFO_NAME
+ , NETDATA_CHART_PRIO_MEM_DIRECTMAP
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ if(arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND)
+ rd_4k = rrddim_add(st_mem_directmap, "4k", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+
+ if(arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND)
+ rd_2m = rrddim_add(st_mem_directmap, "2m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+
+ if(arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND)
+ rd_4m = rrddim_add(st_mem_directmap, "4m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+
+ if(arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND)
+ rd_1g = rrddim_add(st_mem_directmap, "1g", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ if(rd_4k)
+ rrddim_set_by_pointer(st_mem_directmap, rd_4k, DirectMap4k);
+
+ if(rd_2m)
+ rrddim_set_by_pointer(st_mem_directmap, rd_2m, DirectMap2M);
+
+ if(rd_4m)
+ rrddim_set_by_pointer(st_mem_directmap, rd_4m, DirectMap4M);
+
+ if(rd_1g)
+ rrddim_set_by_pointer(st_mem_directmap, rd_1g, DirectMap1G);
+
+ rrdset_done(st_mem_directmap);
}
return 0;
diff --git a/collectors/proc.plugin/proc_pressure.c b/collectors/proc.plugin/proc_pressure.c
index 28e4c592..4037e60a 100644
--- a/collectors/proc.plugin/proc_pressure.c
+++ b/collectors/proc.plugin/proc_pressure.c
@@ -12,28 +12,55 @@ static int pressure_update_every = 0;
static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
{
- .some =
- {.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
- .total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}},
- .full =
- {.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
- .total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}},
+ .some = {
+ .available = true,
+ .share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
+ .total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}
+ },
+ .full = {
+ // Disable CPU full pressure.
+ // See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
+ .available = false,
+ .share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
+ .total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}
+ },
},
{
- .some =
- {.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
- .total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}},
- .full =
- {.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
- .total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}},
+ .some = {
+ .available = true,
+ .share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
+ .total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}
+ },
+ .full = {
+ .available = true,
+ .share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
+ .total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}
+ },
},
{
- .some =
- {.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
- .total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}},
- .full =
- {.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
- .total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}},
+ .some = {
+ .available = true,
+ .share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
+ .total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}
+ },
+ .full = {
+ .available = true,
+ .share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
+ .total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}
+ },
+ },
+ {
+ .some = {
+ // this is not available
+ .available = false,
+ .share_time = {.id = "irq_some_pressure", .title = "IRQ some pressure"},
+ .total_time = {.id = "irq_some_pressure_stall_time", .title = "IRQ some pressure stall time"}
+ },
+ .full = {
+ .available = true,
+ .share_time = {.id = "irq_full_pressure", .title = "IRQ full pressure"},
+ .total_time = {.id = "irq_full_pressure_stall_time", .title = "IRQ full pressure stall time"}
+ },
},
};
@@ -46,6 +73,7 @@ static struct resource_info {
{ .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
{ .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
{ .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
+ { .name = "irq", .family = "interrupts", .section_priority = NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS },
};
void update_pressure_charts(struct pressure_charts *pcs) {
@@ -65,7 +93,7 @@ void update_pressure_charts(struct pressure_charts *pcs) {
}
}
-static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
+static void proc_pressure_do_resource(procfile *ff, int res_idx, size_t line, bool some) {
struct pressure_charts *pcs;
struct resource_info ri;
pcs = some ? &resources[res_idx].some : &resources[res_idx].full;
@@ -93,9 +121,9 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
}
- pcs->share_time.value10 = strtod(procfile_lineword(ff, some ? 0 : 1, 2), NULL);
- pcs->share_time.value60 = strtod(procfile_lineword(ff, some ? 0 : 1, 4), NULL);
- pcs->share_time.value300 = strtod(procfile_lineword(ff, some ? 0 : 1, 6), NULL);
+ pcs->share_time.value10 = strtod(procfile_lineword(ff, line, 2), NULL);
+ pcs->share_time.value60 = strtod(procfile_lineword(ff, line, 4), NULL);
+ pcs->share_time.value300 = strtod(procfile_lineword(ff, line, 6), NULL);
if (unlikely(!pcs->total_time.st)) {
pcs->total_time.st = rrdset_create_localhost(
@@ -114,19 +142,19 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
- pcs->total_time.value_total = str2ull(procfile_lineword(ff, some ? 0 : 1, 8), NULL) / 1000;
+ pcs->total_time.value_total = str2ull(procfile_lineword(ff, line, 8), NULL) / 1000;
}
-static void proc_pressure_do_resource_some(procfile *ff, int res_idx) {
- proc_pressure_do_resource(ff, res_idx, 1);
+static void proc_pressure_do_resource_some(procfile *ff, int res_idx, size_t line) {
+ proc_pressure_do_resource(ff, res_idx, line, true);
}
-static void proc_pressure_do_resource_full(procfile *ff, int res_idx) {
- proc_pressure_do_resource(ff, res_idx, 0);
+static void proc_pressure_do_resource_full(procfile *ff, int res_idx, size_t line) {
+ proc_pressure_do_resource(ff, res_idx, line, false);
}
int do_proc_pressure(int update_every, usec_t dt) {
- int fail_count = 0;
+ int ok_count = 0;
int i;
static usec_t next_pressure_dt = 0;
@@ -150,6 +178,9 @@ int do_proc_pressure(int update_every, usec_t dt) {
procfile *ff = resource_info[i].pf;
int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled;
+ if (!resources[i].some.available && !resources[i].full.available)
+ continue;
+
if (unlikely(!ff)) {
char filename[FILENAME_MAX + 1];
char config_key[CONFIG_MAX_NAME + 1];
@@ -161,56 +192,66 @@ int do_proc_pressure(int update_every, usec_t dt) {
, base_path
, resource_info[i].name);
+ do_some = resources[i].some.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
+ do_full = resources[i].full.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
+
snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
- do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
+ do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_some);
resources[i].some.enabled = do_some;
- // Disable CPU full pressure.
- // See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
- if (i == 0) {
- do_full = CONFIG_BOOLEAN_NO;
- resources[i].full.enabled = do_full;
- } else {
- snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
- do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
- resources[i].full.enabled = do_full;
+ snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
+ do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_full);
+ resources[i].full.enabled = do_full;
+
+ if (!do_full && !do_some) {
+ resources[i].some.available = false;
+ resources[i].full.available = false;
+ continue;
}
- ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT);
+ ff = procfile_open(filename, " =", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
if (unlikely(!ff)) {
- collector_error("Cannot read pressure information from %s.", filename);
- fail_count++;
+ // PSI IRQ was added recently (https://github.com/torvalds/linux/commit/52b1364ba0b105122d6de0e719b36db705011ac1)
+ if (strcmp(resource_info[i].name, "irq") != 0)
+ collector_error("Cannot read pressure information from %s.", filename);
+ resources[i].some.available = false;
+ resources[i].full.available = false;
continue;
}
}
ff = procfile_readall(ff);
resource_info[i].pf = ff;
- if (unlikely(!ff)) {
- fail_count++;
+ if (unlikely(!ff))
continue;
- }
size_t lines = procfile_lines(ff);
if (unlikely(lines < 1)) {
collector_error("%s has no lines.", procfile_filename(ff));
- fail_count++;
continue;
}
- if (do_some) {
- proc_pressure_do_resource_some(ff, i);
- update_pressure_charts(&resources[i].some);
- }
- if (do_full && lines > 2) {
- proc_pressure_do_resource_full(ff, i);
- update_pressure_charts(&resources[i].full);
+ for(size_t l = 0; l < lines ;l++) {
+ const char *key = procfile_lineword(ff, l, 0);
+ if(strcmp(key, "some") == 0) {
+ if(do_some) {
+ proc_pressure_do_resource_some(ff, i, l);
+ update_pressure_charts(&resources[i].some);
+ ok_count++;
+ }
+ }
+ else if(strcmp(key, "full") == 0) {
+ if(do_full) {
+ proc_pressure_do_resource_full(ff, i, l);
+ update_pressure_charts(&resources[i].full);
+ ok_count++;
+ }
+ }
}
}
- if (PRESSURE_NUM_RESOURCES == fail_count) {
+ if(!ok_count)
return 1;
- }
return 0;
}
diff --git a/collectors/proc.plugin/proc_pressure.h b/collectors/proc.plugin/proc_pressure.h
index 0cb23315..2e5cab2c 100644
--- a/collectors/proc.plugin/proc_pressure.h
+++ b/collectors/proc.plugin/proc_pressure.h
@@ -3,13 +3,14 @@
#ifndef NETDATA_PROC_PRESSURE_H
#define NETDATA_PROC_PRESSURE_H
-#define PRESSURE_NUM_RESOURCES 3
+#define PRESSURE_NUM_RESOURCES 4
struct pressure {
int updated;
char *filename;
struct pressure_charts {
+ bool available;
int enabled;
struct pressure_share_time_chart {
diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c
index f0f31935..a4f76796 100644
--- a/collectors/proc.plugin/proc_stat.c
+++ b/collectors/proc.plugin/proc_stat.c
@@ -494,7 +494,7 @@ int do_proc_stat(int update_every, usec_t dt) {
do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", CONFIG_BOOLEAN_YES);
// give sane defaults based on the number of processors
- if(unlikely(get_system_cpus() > 50)) {
+ if(unlikely(get_system_cpus() > 128)) {
// the system has too many processors
keep_per_core_fds_open = CONFIG_BOOLEAN_NO;
do_core_throttle_count = CONFIG_BOOLEAN_NO;
diff --git a/collectors/proc.plugin/proc_vmstat.c b/collectors/proc.plugin/proc_vmstat.c
index ca56e900..b44733b6 100644
--- a/collectors/proc.plugin/proc_vmstat.c
+++ b/collectors/proc.plugin/proc_vmstat.c
@@ -271,7 +271,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
if(unlikely(!st_swapio)) {
st_swapio = rrdset_create_localhost(
- "system"
+ "mem"
, "swapio"
, NULL
, "swap"
@@ -280,7 +280,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
, "KiB/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_VMSTAT_NAME
- , NETDATA_CHART_PRIO_SYSTEM_SWAPIO
+ , NETDATA_CHART_PRIO_MEM_SWAPIO
, update_every
, RRDSET_TYPE_AREA
);
@@ -336,7 +336,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
"mem"
, "pgfaults"
, NULL
- , "system"
+ , "page faults"
, NULL
, "Memory Page Faults"
, "faults/s"
@@ -372,7 +372,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
"mem"
, "oom_kill"
, NULL
- , "system"
+ , "OOM kills"
, NULL
, "Out of Memory Kills"
, "kills/s"
@@ -505,7 +505,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
if(unlikely(!st_zswapio)) {
st_zswapio = rrdset_create_localhost(
- "system"
+ "mem"
, "zswapio"
, NULL
, "zswap"
@@ -514,7 +514,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
, "KiB/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_VMSTAT_NAME
- , NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO
+ , NETDATA_CHART_PRIO_MEM_ZSWAPIO
, update_every
, RRDSET_TYPE_AREA
);
diff --git a/collectors/proc.plugin/sys_class_drm.c b/collectors/proc.plugin/sys_class_drm.c
new file mode 100644
index 00000000..284662cf
--- /dev/null
+++ b/collectors/proc.plugin/sys_class_drm.c
@@ -0,0 +1,1179 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "plugin_proc.h"
+
+#define PLUGIN_PROC_MODULE_DRM_NAME "/sys/class/drm"
+#define CONFIG_SECTION_PLUGIN_PROC_DRM "plugin:proc:/sys/class/drm"
+#define AMDGPU_CHART_TYPE "amdgpu"
+
+struct amdgpu_id_struct {
+ unsigned long long asic_id;
+ unsigned long long pci_rev_id;
+ const char *marketing_name;
+};
+
+/*
+ * About amdgpu_ids list:
+ * ------------------------------------------------------------------------
+ * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * The list is copied from:
+ * https://raw.githubusercontent.com/Syllo/nvtop/master/src/amdgpu_ids.h
+ *
+ * which is modified from libdrm (MIT License):
+ *
+ * URL: https://gitlab.freedesktop.org/mesa/drm/-/blob/main/data/amdgpu.ids
+ * ------------------------------------------------------------------------
+ * **IMPORTANT**: The amdgpu_ids has to be modified after new GPU releases.
+ * ------------------------------------------------------------------------*/
+
+static const struct amdgpu_id_struct amdgpu_ids[] = {
+ {0x1309, 0x00, "AMD Radeon R7 Graphics"},
+ {0x130A, 0x00, "AMD Radeon R6 Graphics"},
+ {0x130B, 0x00, "AMD Radeon R4 Graphics"},
+ {0x130C, 0x00, "AMD Radeon R7 Graphics"},
+ {0x130D, 0x00, "AMD Radeon R6 Graphics"},
+ {0x130E, 0x00, "AMD Radeon R5 Graphics"},
+ {0x130F, 0x00, "AMD Radeon R7 Graphics"},
+ {0x130F, 0xD4, "AMD Radeon R7 Graphics"},
+ {0x130F, 0xD5, "AMD Radeon R7 Graphics"},
+ {0x130F, 0xD6, "AMD Radeon R7 Graphics"},
+ {0x130F, 0xD7, "AMD Radeon R7 Graphics"},
+ {0x1313, 0x00, "AMD Radeon R7 Graphics"},
+ {0x1313, 0xD4, "AMD Radeon R7 Graphics"},
+ {0x1313, 0xD5, "AMD Radeon R7 Graphics"},
+ {0x1313, 0xD6, "AMD Radeon R7 Graphics"},
+ {0x1315, 0x00, "AMD Radeon R5 Graphics"},
+ {0x1315, 0xD4, "AMD Radeon R5 Graphics"},
+ {0x1315, 0xD5, "AMD Radeon R5 Graphics"},
+ {0x1315, 0xD6, "AMD Radeon R5 Graphics"},
+ {0x1315, 0xD7, "AMD Radeon R5 Graphics"},
+ {0x1316, 0x00, "AMD Radeon R5 Graphics"},
+ {0x1318, 0x00, "AMD Radeon R5 Graphics"},
+ {0x131B, 0x00, "AMD Radeon R4 Graphics"},
+ {0x131C, 0x00, "AMD Radeon R7 Graphics"},
+ {0x131D, 0x00, "AMD Radeon R6 Graphics"},
+ {0x15D8, 0x00, "AMD Radeon RX Vega 8 Graphics WS"},
+ {0x15D8, 0x91, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0x91, "AMD Ryzen Embedded R1606G with Radeon Vega Gfx"},
+ {0x15D8, 0x92, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0x92, "AMD Ryzen Embedded R1505G with Radeon Vega Gfx"},
+ {0x15D8, 0x93, "AMD Radeon Vega 1 Graphics"},
+ {0x15D8, 0xA1, "AMD Radeon Vega 10 Graphics"},
+ {0x15D8, 0xA2, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xA3, "AMD Radeon Vega 6 Graphics"},
+ {0x15D8, 0xA4, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xB1, "AMD Radeon Vega 10 Graphics"},
+ {0x15D8, 0xB2, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xB3, "AMD Radeon Vega 6 Graphics"},
+ {0x15D8, 0xB4, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xC1, "AMD Radeon Vega 10 Graphics"},
+ {0x15D8, 0xC2, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xC3, "AMD Radeon Vega 6 Graphics"},
+ {0x15D8, 0xC4, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xC5, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xC8, "AMD Radeon Vega 11 Graphics"},
+ {0x15D8, 0xC9, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xCA, "AMD Radeon Vega 11 Graphics"},
+ {0x15D8, 0xCB, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xCC, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xCE, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xCF, "AMD Ryzen Embedded R1305G with Radeon Vega Gfx"},
+ {0x15D8, 0xD1, "AMD Radeon Vega 10 Graphics"},
+ {0x15D8, 0xD2, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xD3, "AMD Radeon Vega 6 Graphics"},
+ {0x15D8, 0xD4, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xD8, "AMD Radeon Vega 11 Graphics"},
+ {0x15D8, 0xD9, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xDA, "AMD Radeon Vega 11 Graphics"},
+ {0x15D8, 0xDB, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xDB, "AMD Radeon Vega 8 Graphics"},
+ {0x15D8, 0xDC, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xDD, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xDE, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xDF, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xE3, "AMD Radeon Vega 3 Graphics"},
+ {0x15D8, 0xE4, "AMD Ryzen Embedded R1102G with Radeon Vega Gfx"},
+ {0x15DD, 0x81, "AMD Ryzen Embedded V1807B with Radeon Vega Gfx"},
+ {0x15DD, 0x82, "AMD Ryzen Embedded V1756B with Radeon Vega Gfx"},
+ {0x15DD, 0x83, "AMD Ryzen Embedded V1605B with Radeon Vega Gfx"},
+ {0x15DD, 0x84, "AMD Radeon Vega 6 Graphics"},
+ {0x15DD, 0x85, "AMD Ryzen Embedded V1202B with Radeon Vega Gfx"},
+ {0x15DD, 0x86, "AMD Radeon Vega 11 Graphics"},
+ {0x15DD, 0x88, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xC1, "AMD Radeon Vega 11 Graphics"},
+ {0x15DD, 0xC2, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xC3, "AMD Radeon Vega 3 / 10 Graphics"},
+ {0x15DD, 0xC4, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xC5, "AMD Radeon Vega 3 Graphics"},
+ {0x15DD, 0xC6, "AMD Radeon Vega 11 Graphics"},
+ {0x15DD, 0xC8, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xC9, "AMD Radeon Vega 11 Graphics"},
+ {0x15DD, 0xCA, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xCB, "AMD Radeon Vega 3 Graphics"},
+ {0x15DD, 0xCC, "AMD Radeon Vega 6 Graphics"},
+ {0x15DD, 0xCE, "AMD Radeon Vega 3 Graphics"},
+ {0x15DD, 0xCF, "AMD Radeon Vega 3 Graphics"},
+ {0x15DD, 0xD0, "AMD Radeon Vega 10 Graphics"},
+ {0x15DD, 0xD1, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xD3, "AMD Radeon Vega 11 Graphics"},
+ {0x15DD, 0xD5, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xD6, "AMD Radeon Vega 11 Graphics"},
+ {0x15DD, 0xD7, "AMD Radeon Vega 8 Graphics"},
+ {0x15DD, 0xD8, "AMD Radeon Vega 3 Graphics"},
+ {0x15DD, 0xD9, "AMD Radeon Vega 6 Graphics"},
+ {0x15DD, 0xE1, "AMD Radeon Vega 3 Graphics"},
+ {0x15DD, 0xE2, "AMD Radeon Vega 3 Graphics"},
+ {0x163F, 0xAE, "AMD Custom GPU 0405"},
+ {0x6600, 0x00, "AMD Radeon HD 8600 / 8700M"},
+ {0x6600, 0x81, "AMD Radeon R7 M370"},
+ {0x6601, 0x00, "AMD Radeon HD 8500M / 8700M"},
+ {0x6604, 0x00, "AMD Radeon R7 M265 Series"},
+ {0x6604, 0x81, "AMD Radeon R7 M350"},
+ {0x6605, 0x00, "AMD Radeon R7 M260 Series"},
+ {0x6605, 0x81, "AMD Radeon R7 M340"},
+ {0x6606, 0x00, "AMD Radeon HD 8790M"},
+ {0x6607, 0x00, "AMD Radeon R5 M240"},
+ {0x6608, 0x00, "AMD FirePro W2100"},
+ {0x6610, 0x00, "AMD Radeon R7 200 Series"},
+ {0x6610, 0x81, "AMD Radeon R7 350"},
+ {0x6610, 0x83, "AMD Radeon R5 340"},
+ {0x6610, 0x87, "AMD Radeon R7 200 Series"},
+ {0x6611, 0x00, "AMD Radeon R7 200 Series"},
+ {0x6611, 0x87, "AMD Radeon R7 200 Series"},
+ {0x6613, 0x00, "AMD Radeon R7 200 Series"},
+ {0x6617, 0x00, "AMD Radeon R7 240 Series"},
+ {0x6617, 0x87, "AMD Radeon R7 200 Series"},
+ {0x6617, 0xC7, "AMD Radeon R7 240 Series"},
+ {0x6640, 0x00, "AMD Radeon HD 8950"},
+ {0x6640, 0x80, "AMD Radeon R9 M380"},
+ {0x6646, 0x00, "AMD Radeon R9 M280X"},
+ {0x6646, 0x80, "AMD Radeon R9 M385"},
+ {0x6646, 0x80, "AMD Radeon R9 M470X"},
+ {0x6647, 0x00, "AMD Radeon R9 M200X Series"},
+ {0x6647, 0x80, "AMD Radeon R9 M380"},
+ {0x6649, 0x00, "AMD FirePro W5100"},
+ {0x6658, 0x00, "AMD Radeon R7 200 Series"},
+ {0x665C, 0x00, "AMD Radeon HD 7700 Series"},
+ {0x665D, 0x00, "AMD Radeon R7 200 Series"},
+ {0x665F, 0x81, "AMD Radeon R7 360 Series"},
+ {0x6660, 0x00, "AMD Radeon HD 8600M Series"},
+ {0x6660, 0x81, "AMD Radeon R5 M335"},
+ {0x6660, 0x83, "AMD Radeon R5 M330"},
+ {0x6663, 0x00, "AMD Radeon HD 8500M Series"},
+ {0x6663, 0x83, "AMD Radeon R5 M320"},
+ {0x6664, 0x00, "AMD Radeon R5 M200 Series"},
+ {0x6665, 0x00, "AMD Radeon R5 M230 Series"},
+ {0x6665, 0x83, "AMD Radeon R5 M320"},
+ {0x6665, 0xC3, "AMD Radeon R5 M435"},
+ {0x6666, 0x00, "AMD Radeon R5 M200 Series"},
+ {0x6667, 0x00, "AMD Radeon R5 M200 Series"},
+ {0x666F, 0x00, "AMD Radeon HD 8500M"},
+ {0x66A1, 0x02, "AMD Instinct MI60 / MI50"},
+ {0x66A1, 0x06, "AMD Radeon Pro VII"},
+ {0x66AF, 0xC1, "AMD Radeon VII"},
+ {0x6780, 0x00, "AMD FirePro W9000"},
+ {0x6784, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"},
+ {0x6788, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"},
+ {0x678A, 0x00, "AMD FirePro W8000"},
+ {0x6798, 0x00, "AMD Radeon R9 200 / HD 7900 Series"},
+ {0x6799, 0x00, "AMD Radeon HD 7900 Series"},
+ {0x679A, 0x00, "AMD Radeon HD 7900 Series"},
+ {0x679B, 0x00, "AMD Radeon HD 7900 Series"},
+ {0x679E, 0x00, "AMD Radeon HD 7800 Series"},
+ {0x67A0, 0x00, "AMD Radeon FirePro W9100"},
+ {0x67A1, 0x00, "AMD Radeon FirePro W8100"},
+ {0x67B0, 0x00, "AMD Radeon R9 200 Series"},
+ {0x67B0, 0x80, "AMD Radeon R9 390 Series"},
+ {0x67B1, 0x00, "AMD Radeon R9 200 Series"},
+ {0x67B1, 0x80, "AMD Radeon R9 390 Series"},
+ {0x67B9, 0x00, "AMD Radeon R9 200 Series"},
+ {0x67C0, 0x00, "AMD Radeon Pro WX 7100 Graphics"},
+ {0x67C0, 0x80, "AMD Radeon E9550"},
+ {0x67C2, 0x01, "AMD Radeon Pro V7350x2"},
+ {0x67C2, 0x02, "AMD Radeon Pro V7300X"},
+ {0x67C4, 0x00, "AMD Radeon Pro WX 7100 Graphics"},
+ {0x67C4, 0x80, "AMD Radeon E9560 / E9565 Graphics"},
+ {0x67C7, 0x00, "AMD Radeon Pro WX 5100 Graphics"},
+ {0x67C7, 0x80, "AMD Radeon E9390 Graphics"},
+ {0x67D0, 0x01, "AMD Radeon Pro V7350x2"},
+ {0x67D0, 0x02, "AMD Radeon Pro V7300X"},
+ {0x67DF, 0xC0, "AMD Radeon Pro 580X"},
+ {0x67DF, 0xC1, "AMD Radeon RX 580 Series"},
+ {0x67DF, 0xC2, "AMD Radeon RX 570 Series"},
+ {0x67DF, 0xC3, "AMD Radeon RX 580 Series"},
+ {0x67DF, 0xC4, "AMD Radeon RX 480 Graphics"},
+ {0x67DF, 0xC5, "AMD Radeon RX 470 Graphics"},
+ {0x67DF, 0xC6, "AMD Radeon RX 570 Series"},
+ {0x67DF, 0xC7, "AMD Radeon RX 480 Graphics"},
+ {0x67DF, 0xCF, "AMD Radeon RX 470 Graphics"},
+ {0x67DF, 0xD7, "AMD Radeon RX 470 Graphics"},
+ {0x67DF, 0xE0, "AMD Radeon RX 470 Series"},
+ {0x67DF, 0xE1, "AMD Radeon RX 590 Series"},
+ {0x67DF, 0xE3, "AMD Radeon RX Series"},
+ {0x67DF, 0xE7, "AMD Radeon RX 580 Series"},
+ {0x67DF, 0xEB, "AMD Radeon Pro 580X"},
+ {0x67DF, 0xEF, "AMD Radeon RX 570 Series"},
+ {0x67DF, 0xF7, "AMD Radeon RX P30PH"},
+ {0x67DF, 0xFF, "AMD Radeon RX 470 Series"},
+ {0x67E0, 0x00, "AMD Radeon Pro WX Series"},
+ {0x67E3, 0x00, "AMD Radeon Pro WX 4100"},
+ {0x67E8, 0x00, "AMD Radeon Pro WX Series"},
+ {0x67E8, 0x01, "AMD Radeon Pro WX Series"},
+ {0x67E8, 0x80, "AMD Radeon E9260 Graphics"},
+ {0x67EB, 0x00, "AMD Radeon Pro V5300X"},
+ {0x67EF, 0xC0, "AMD Radeon RX Graphics"},
+ {0x67EF, 0xC1, "AMD Radeon RX 460 Graphics"},
+ {0x67EF, 0xC2, "AMD Radeon Pro Series"},
+ {0x67EF, 0xC3, "AMD Radeon RX Series"},
+ {0x67EF, 0xC5, "AMD Radeon RX 460 Graphics"},
+ {0x67EF, 0xC7, "AMD Radeon RX Graphics"},
+ {0x67EF, 0xCF, "AMD Radeon RX 460 Graphics"},
+ {0x67EF, 0xE0, "AMD Radeon RX 560 Series"},
+ {0x67EF, 0xE1, "AMD Radeon RX Series"},
+ {0x67EF, 0xE2, "AMD Radeon RX 560X"},
+ {0x67EF, 0xE3, "AMD Radeon RX Series"},
+ {0x67EF, 0xE5, "AMD Radeon RX 560 Series"},
+ {0x67EF, 0xE7, "AMD Radeon RX 560 Series"},
+ {0x67EF, 0xEF, "AMD Radeon 550 Series"},
+ {0x67EF, 0xFF, "AMD Radeon RX 460 Graphics"},
+ {0x67FF, 0xC0, "AMD Radeon Pro 465"},
+ {0x67FF, 0xC1, "AMD Radeon RX 560 Series"},
+ {0x67FF, 0xCF, "AMD Radeon RX 560 Series"},
+ {0x67FF, 0xEF, "AMD Radeon RX 560 Series"},
+ {0x67FF, 0xFF, "AMD Radeon RX 550 Series"},
+ {0x6800, 0x00, "AMD Radeon HD 7970M"},
+ {0x6801, 0x00, "AMD Radeon HD 8970M"},
+ {0x6806, 0x00, "AMD Radeon R9 M290X"},
+ {0x6808, 0x00, "AMD FirePro W7000"},
+ {0x6808, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"},
+ {0x6809, 0x00, "ATI FirePro W5000"},
+ {0x6810, 0x00, "AMD Radeon R9 200 Series"},
+ {0x6810, 0x81, "AMD Radeon R9 370 Series"},
+ {0x6811, 0x00, "AMD Radeon R9 200 Series"},
+ {0x6811, 0x81, "AMD Radeon R7 370 Series"},
+ {0x6818, 0x00, "AMD Radeon HD 7800 Series"},
+ {0x6819, 0x00, "AMD Radeon HD 7800 Series"},
+ {0x6820, 0x00, "AMD Radeon R9 M275X"},
+ {0x6820, 0x81, "AMD Radeon R9 M375"},
+ {0x6820, 0x83, "AMD Radeon R9 M375X"},
+ {0x6821, 0x00, "AMD Radeon R9 M200X Series"},
+ {0x6821, 0x83, "AMD Radeon R9 M370X"},
+ {0x6821, 0x87, "AMD Radeon R7 M380"},
+ {0x6822, 0x00, "AMD Radeon E8860"},
+ {0x6823, 0x00, "AMD Radeon R9 M200X Series"},
+ {0x6825, 0x00, "AMD Radeon HD 7800M Series"},
+ {0x6826, 0x00, "AMD Radeon HD 7700M Series"},
+ {0x6827, 0x00, "AMD Radeon HD 7800M Series"},
+ {0x6828, 0x00, "AMD FirePro W600"},
+ {0x682B, 0x00, "AMD Radeon HD 8800M Series"},
+ {0x682B, 0x87, "AMD Radeon R9 M360"},
+ {0x682C, 0x00, "AMD FirePro W4100"},
+ {0x682D, 0x00, "AMD Radeon HD 7700M Series"},
+ {0x682F, 0x00, "AMD Radeon HD 7700M Series"},
+ {0x6830, 0x00, "AMD Radeon 7800M Series"},
+ {0x6831, 0x00, "AMD Radeon 7700M Series"},
+ {0x6835, 0x00, "AMD Radeon R7 Series / HD 9000 Series"},
+ {0x6837, 0x00, "AMD Radeon HD 7700 Series"},
+ {0x683D, 0x00, "AMD Radeon HD 7700 Series"},
+ {0x683F, 0x00, "AMD Radeon HD 7700 Series"},
+ {0x684C, 0x00, "ATI FirePro V (FireGL V) Graphics Adapter"},
+ {0x6860, 0x00, "AMD Radeon Instinct MI25"},
+ {0x6860, 0x01, "AMD Radeon Instinct MI25"},
+ {0x6860, 0x02, "AMD Radeon Instinct MI25"},
+ {0x6860, 0x03, "AMD Radeon Pro V340"},
+ {0x6860, 0x04, "AMD Radeon Instinct MI25x2"},
+ {0x6860, 0x07, "AMD Radeon Pro V320"},
+ {0x6861, 0x00, "AMD Radeon Pro WX 9100"},
+ {0x6862, 0x00, "AMD Radeon Pro SSG"},
+ {0x6863, 0x00, "AMD Radeon Vega Frontier Edition"},
+ {0x6864, 0x03, "AMD Radeon Pro V340"},
+ {0x6864, 0x04, "AMD Radeon Instinct MI25x2"},
+ {0x6864, 0x05, "AMD Radeon Pro V340"},
+ {0x6868, 0x00, "AMD Radeon Pro WX 8200"},
+ {0x686C, 0x00, "AMD Radeon Instinct MI25 MxGPU"},
+ {0x686C, 0x01, "AMD Radeon Instinct MI25 MxGPU"},
+ {0x686C, 0x02, "AMD Radeon Instinct MI25 MxGPU"},
+ {0x686C, 0x03, "AMD Radeon Pro V340 MxGPU"},
+ {0x686C, 0x04, "AMD Radeon Instinct MI25x2 MxGPU"},
+ {0x686C, 0x05, "AMD Radeon Pro V340L MxGPU"},
+ {0x686C, 0x06, "AMD Radeon Instinct MI25 MxGPU"},
+ {0x687F, 0x01, "AMD Radeon RX Vega"},
+ {0x687F, 0xC0, "AMD Radeon RX Vega"},
+ {0x687F, 0xC1, "AMD Radeon RX Vega"},
+ {0x687F, 0xC3, "AMD Radeon RX Vega"},
+ {0x687F, 0xC7, "AMD Radeon RX Vega"},
+ {0x6900, 0x00, "AMD Radeon R7 M260"},
+ {0x6900, 0x81, "AMD Radeon R7 M360"},
+ {0x6900, 0x83, "AMD Radeon R7 M340"},
+ {0x6900, 0xC1, "AMD Radeon R5 M465 Series"},
+ {0x6900, 0xC3, "AMD Radeon R5 M445 Series"},
+ {0x6900, 0xD1, "AMD Radeon 530 Series"},
+ {0x6900, 0xD3, "AMD Radeon 530 Series"},
+ {0x6901, 0x00, "AMD Radeon R5 M255"},
+ {0x6902, 0x00, "AMD Radeon Series"},
+ {0x6907, 0x00, "AMD Radeon R5 M255"},
+ {0x6907, 0x87, "AMD Radeon R5 M315"},
+ {0x6920, 0x00, "AMD Radeon R9 M395X"},
+ {0x6920, 0x01, "AMD Radeon R9 M390X"},
+ {0x6921, 0x00, "AMD Radeon R9 M390X"},
+ {0x6929, 0x00, "AMD FirePro S7150"},
+ {0x6929, 0x01, "AMD FirePro S7100X"},
+ {0x692B, 0x00, "AMD FirePro W7100"},
+ {0x6938, 0x00, "AMD Radeon R9 200 Series"},
+ {0x6938, 0xF0, "AMD Radeon R9 200 Series"},
+ {0x6938, 0xF1, "AMD Radeon R9 380 Series"},
+ {0x6939, 0x00, "AMD Radeon R9 200 Series"},
+ {0x6939, 0xF0, "AMD Radeon R9 200 Series"},
+ {0x6939, 0xF1, "AMD Radeon R9 380 Series"},
+ {0x694C, 0xC0, "AMD Radeon RX Vega M GH Graphics"},
+ {0x694E, 0xC0, "AMD Radeon RX Vega M GL Graphics"},
+ {0x6980, 0x00, "AMD Radeon Pro WX 3100"},
+ {0x6981, 0x00, "AMD Radeon Pro WX 3200 Series"},
+ {0x6981, 0x01, "AMD Radeon Pro WX 3200 Series"},
+ {0x6981, 0x10, "AMD Radeon Pro WX 3200 Series"},
+ {0x6985, 0x00, "AMD Radeon Pro WX 3100"},
+ {0x6986, 0x00, "AMD Radeon Pro WX 2100"},
+ {0x6987, 0x80, "AMD Embedded Radeon E9171"},
+ {0x6987, 0xC0, "AMD Radeon 550X Series"},
+ {0x6987, 0xC1, "AMD Radeon RX 640"},
+ {0x6987, 0xC3, "AMD Radeon 540X Series"},
+ {0x6987, 0xC7, "AMD Radeon 540"},
+ {0x6995, 0x00, "AMD Radeon Pro WX 2100"},
+ {0x6997, 0x00, "AMD Radeon Pro WX 2100"},
+ {0x699F, 0x81, "AMD Embedded Radeon E9170 Series"},
+ {0x699F, 0xC0, "AMD Radeon 500 Series"},
+ {0x699F, 0xC1, "AMD Radeon 540 Series"},
+ {0x699F, 0xC3, "AMD Radeon 500 Series"},
+ {0x699F, 0xC7, "AMD Radeon RX 550 / 550 Series"},
+ {0x699F, 0xC9, "AMD Radeon 540"},
+ {0x6FDF, 0xE7, "AMD Radeon RX 590 GME"},
+ {0x6FDF, 0xEF, "AMD Radeon RX 580 2048SP"},
+ {0x7300, 0xC1, "AMD FirePro S9300 x2"},
+ {0x7300, 0xC8, "AMD Radeon R9 Fury Series"},
+ {0x7300, 0xC9, "AMD Radeon Pro Duo"},
+ {0x7300, 0xCA, "AMD Radeon R9 Fury Series"},
+ {0x7300, 0xCB, "AMD Radeon R9 Fury Series"},
+ {0x7312, 0x00, "AMD Radeon Pro W5700"},
+ {0x731E, 0xC6, "AMD Radeon RX 5700XTB"},
+ {0x731E, 0xC7, "AMD Radeon RX 5700B"},
+ {0x731F, 0xC0, "AMD Radeon RX 5700 XT 50th Anniversary"},
+ {0x731F, 0xC1, "AMD Radeon RX 5700 XT"},
+ {0x731F, 0xC2, "AMD Radeon RX 5600M"},
+ {0x731F, 0xC3, "AMD Radeon RX 5700M"},
+ {0x731F, 0xC4, "AMD Radeon RX 5700"},
+ {0x731F, 0xC5, "AMD Radeon RX 5700 XT"},
+ {0x731F, 0xCA, "AMD Radeon RX 5600 XT"},
+ {0x731F, 0xCB, "AMD Radeon RX 5600 OEM"},
+ {0x7340, 0xC1, "AMD Radeon RX 5500M"},
+ {0x7340, 0xC3, "AMD Radeon RX 5300M"},
+ {0x7340, 0xC5, "AMD Radeon RX 5500 XT"},
+ {0x7340, 0xC7, "AMD Radeon RX 5500"},
+ {0x7340, 0xC9, "AMD Radeon RX 5500XTB"},
+ {0x7340, 0xCF, "AMD Radeon RX 5300"},
+ {0x7341, 0x00, "AMD Radeon Pro W5500"},
+ {0x7347, 0x00, "AMD Radeon Pro W5500M"},
+ {0x7360, 0x41, "AMD Radeon Pro 5600M"},
+ {0x7360, 0xC3, "AMD Radeon Pro V520"},
+ {0x738C, 0x01, "AMD Instinct MI100"},
+ {0x73A3, 0x00, "AMD Radeon Pro W6800"},
+ {0x73A5, 0xC0, "AMD Radeon RX 6950 XT"},
+ {0x73AF, 0xC0, "AMD Radeon RX 6900 XT"},
+ {0x73BF, 0xC0, "AMD Radeon RX 6900 XT"},
+ {0x73BF, 0xC1, "AMD Radeon RX 6800 XT"},
+ {0x73BF, 0xC3, "AMD Radeon RX 6800"},
+ {0x73DF, 0xC0, "AMD Radeon RX 6750 XT"},
+ {0x73DF, 0xC1, "AMD Radeon RX 6700 XT"},
+ {0x73DF, 0xC2, "AMD Radeon RX 6800M"},
+ {0x73DF, 0xC3, "AMD Radeon RX 6800M"},
+ {0x73DF, 0xC5, "AMD Radeon RX 6700 XT"},
+ {0x73DF, 0xCF, "AMD Radeon RX 6700M"},
+ {0x73DF, 0xD7, "AMD TDC-235"},
+ {0x73E1, 0x00, "AMD Radeon Pro W6600M"},
+ {0x73E3, 0x00, "AMD Radeon Pro W6600"},
+ {0x73EF, 0xC0, "AMD Radeon RX 6800S"},
+ {0x73EF, 0xC1, "AMD Radeon RX 6650 XT"},
+ {0x73EF, 0xC2, "AMD Radeon RX 6700S"},
+ {0x73EF, 0xC3, "AMD Radeon RX 6650M"},
+ {0x73EF, 0xC4, "AMD Radeon RX 6650M XT"},
+ {0x73FF, 0xC1, "AMD Radeon RX 6600 XT"},
+ {0x73FF, 0xC3, "AMD Radeon RX 6600M"},
+ {0x73FF, 0xC7, "AMD Radeon RX 6600"},
+ {0x73FF, 0xCB, "AMD Radeon RX 6600S"},
+ {0x7408, 0x00, "AMD Instinct MI250X"},
+ {0x740C, 0x01, "AMD Instinct MI250X / MI250"},
+ {0x740F, 0x02, "AMD Instinct MI210"},
+ {0x7421, 0x00, "AMD Radeon Pro W6500M"},
+ {0x7422, 0x00, "AMD Radeon Pro W6400"},
+ {0x7423, 0x00, "AMD Radeon Pro W6300M"},
+ {0x7423, 0x01, "AMD Radeon Pro W6300"},
+ {0x7424, 0x00, "AMD Radeon RX 6300"},
+ {0x743F, 0xC1, "AMD Radeon RX 6500 XT"},
+ {0x743F, 0xC3, "AMD Radeon RX 6500"},
+ {0x743F, 0xC3, "AMD Radeon RX 6500M"},
+ {0x743F, 0xC7, "AMD Radeon RX 6400"},
+ {0x743F, 0xCF, "AMD Radeon RX 6300M"},
+ {0x744C, 0xC8, "AMD Radeon RX 7900 XTX"},
+ {0x744C, 0xCC, "AMD Radeon RX 7900 XT"},
+ {0x7480, 0xC1, "AMD Radeon RX 7700S"},
+ {0x7480, 0xC3, "AMD Radeon RX 7600S"},
+ {0x7480, 0xC7, "AMD Radeon RX 7600M XT"},
+ {0x7483, 0xCF, "AMD Radeon RX 7600M"},
+ {0x9830, 0x00, "AMD Radeon HD 8400 / R3 Series"},
+ {0x9831, 0x00, "AMD Radeon HD 8400E"},
+ {0x9832, 0x00, "AMD Radeon HD 8330"},
+ {0x9833, 0x00, "AMD Radeon HD 8330E"},
+ {0x9834, 0x00, "AMD Radeon HD 8210"},
+ {0x9835, 0x00, "AMD Radeon HD 8210E"},
+ {0x9836, 0x00, "AMD Radeon HD 8200 / R3 Series"},
+ {0x9837, 0x00, "AMD Radeon HD 8280E"},
+ {0x9838, 0x00, "AMD Radeon HD 8200 / R3 series"},
+ {0x9839, 0x00, "AMD Radeon HD 8180"},
+ {0x983D, 0x00, "AMD Radeon HD 8250"},
+ {0x9850, 0x00, "AMD Radeon R3 Graphics"},
+ {0x9850, 0x03, "AMD Radeon R3 Graphics"},
+ {0x9850, 0x40, "AMD Radeon R2 Graphics"},
+ {0x9850, 0x45, "AMD Radeon R3 Graphics"},
+ {0x9851, 0x00, "AMD Radeon R4 Graphics"},
+ {0x9851, 0x01, "AMD Radeon R5E Graphics"},
+ {0x9851, 0x05, "AMD Radeon R5 Graphics"},
+ {0x9851, 0x06, "AMD Radeon R5E Graphics"},
+ {0x9851, 0x40, "AMD Radeon R4 Graphics"},
+ {0x9851, 0x45, "AMD Radeon R5 Graphics"},
+ {0x9852, 0x00, "AMD Radeon R2 Graphics"},
+ {0x9852, 0x40, "AMD Radeon E1 Graphics"},
+ {0x9853, 0x00, "AMD Radeon R2 Graphics"},
+ {0x9853, 0x01, "AMD Radeon R4E Graphics"},
+ {0x9853, 0x03, "AMD Radeon R2 Graphics"},
+ {0x9853, 0x05, "AMD Radeon R1E Graphics"},
+ {0x9853, 0x06, "AMD Radeon R1E Graphics"},
+ {0x9853, 0x07, "AMD Radeon R1E Graphics"},
+ {0x9853, 0x08, "AMD Radeon R1E Graphics"},
+ {0x9853, 0x40, "AMD Radeon R2 Graphics"},
+ {0x9854, 0x00, "AMD Radeon R3 Graphics"},
+ {0x9854, 0x01, "AMD Radeon R3E Graphics"},
+ {0x9854, 0x02, "AMD Radeon R3 Graphics"},
+ {0x9854, 0x05, "AMD Radeon R2 Graphics"},
+ {0x9854, 0x06, "AMD Radeon R4 Graphics"},
+ {0x9854, 0x07, "AMD Radeon R3 Graphics"},
+ {0x9855, 0x02, "AMD Radeon R6 Graphics"},
+ {0x9855, 0x05, "AMD Radeon R4 Graphics"},
+ {0x9856, 0x00, "AMD Radeon R2 Graphics"},
+ {0x9856, 0x01, "AMD Radeon R2E Graphics"},
+ {0x9856, 0x02, "AMD Radeon R2 Graphics"},
+ {0x9856, 0x05, "AMD Radeon R1E Graphics"},
+ {0x9856, 0x06, "AMD Radeon R2 Graphics"},
+ {0x9856, 0x07, "AMD Radeon R1E Graphics"},
+ {0x9856, 0x08, "AMD Radeon R1E Graphics"},
+ {0x9856, 0x13, "AMD Radeon R1E Graphics"},
+ {0x9874, 0x81, "AMD Radeon R6 Graphics"},
+ {0x9874, 0x84, "AMD Radeon R7 Graphics"},
+ {0x9874, 0x85, "AMD Radeon R6 Graphics"},
+ {0x9874, 0x87, "AMD Radeon R5 Graphics"},
+ {0x9874, 0x88, "AMD Radeon R7E Graphics"},
+ {0x9874, 0x89, "AMD Radeon R6E Graphics"},
+ {0x9874, 0xC4, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xC5, "AMD Radeon R6 Graphics"},
+ {0x9874, 0xC6, "AMD Radeon R6 Graphics"},
+ {0x9874, 0xC7, "AMD Radeon R5 Graphics"},
+ {0x9874, 0xC8, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xC9, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xCA, "AMD Radeon R5 Graphics"},
+ {0x9874, 0xCB, "AMD Radeon R5 Graphics"},
+ {0x9874, 0xCC, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xCD, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xCE, "AMD Radeon R5 Graphics"},
+ {0x9874, 0xE1, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xE2, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xE3, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xE4, "AMD Radeon R7 Graphics"},
+ {0x9874, 0xE5, "AMD Radeon R5 Graphics"},
+ {0x9874, 0xE6, "AMD Radeon R5 Graphics"},
+ {0x98E4, 0x80, "AMD Radeon R5E Graphics"},
+ {0x98E4, 0x81, "AMD Radeon R4E Graphics"},
+ {0x98E4, 0x83, "AMD Radeon R2E Graphics"},
+ {0x98E4, 0x84, "AMD Radeon R2E Graphics"},
+ {0x98E4, 0x86, "AMD Radeon R1E Graphics"},
+ {0x98E4, 0xC0, "AMD Radeon R4 Graphics"},
+ {0x98E4, 0xC1, "AMD Radeon R5 Graphics"},
+ {0x98E4, 0xC2, "AMD Radeon R4 Graphics"},
+ {0x98E4, 0xC4, "AMD Radeon R5 Graphics"},
+ {0x98E4, 0xC6, "AMD Radeon R5 Graphics"},
+ {0x98E4, 0xC8, "AMD Radeon R4 Graphics"},
+ {0x98E4, 0xC9, "AMD Radeon R4 Graphics"},
+ {0x98E4, 0xCA, "AMD Radeon R5 Graphics"},
+ {0x98E4, 0xD0, "AMD Radeon R2 Graphics"},
+ {0x98E4, 0xD1, "AMD Radeon R2 Graphics"},
+ {0x98E4, 0xD2, "AMD Radeon R2 Graphics"},
+ {0x98E4, 0xD4, "AMD Radeon R2 Graphics"},
+ {0x98E4, 0xD9, "AMD Radeon R5 Graphics"},
+ {0x98E4, 0xDA, "AMD Radeon R5 Graphics"},
+ {0x98E4, 0xDB, "AMD Radeon R3 Graphics"},
+ {0x98E4, 0xE1, "AMD Radeon R3 Graphics"},
+ {0x98E4, 0xE2, "AMD Radeon R3 Graphics"},
+ {0x98E4, 0xE9, "AMD Radeon R4 Graphics"},
+ {0x98E4, 0xEA, "AMD Radeon R4 Graphics"},
+ {0x98E4, 0xEB, "AMD Radeon R3 Graphics"},
+ {0x98E4, 0xEC, "AMD Radeon R4 Graphics"},
+ {0x0000, 0x00, "unknown AMD GPU"} // this must always be the last item
+};
+
+struct card {
+ const char *pathname;
+ struct amdgpu_id_struct id;
+
+ /* GPU and VRAM utilizations */
+
+ const char *pathname_util_gpu;
+ RRDSET *st_util_gpu;
+ RRDDIM *rd_util_gpu;
+ collected_number util_gpu;
+
+ const char *pathname_util_mem;
+ RRDSET *st_util_mem;
+ RRDDIM *rd_util_mem;
+ collected_number util_mem;
+
+
+ /* GPU and VRAM clock frequencies */
+
+ const char *pathname_clk_gpu;
+ procfile *ff_clk_gpu;
+ RRDSET *st_clk_gpu;
+ RRDDIM *rd_clk_gpu;
+ collected_number clk_gpu;
+
+ const char *pathname_clk_mem;
+ procfile *ff_clk_mem;
+ RRDSET *st_clk_mem;
+ RRDDIM *rd_clk_mem;
+ collected_number clk_mem;
+
+
+ /* GPU memory usage */
+
+ const char *pathname_mem_used_vram;
+ const char *pathname_mem_total_vram;
+
+ RRDSET *st_mem_usage_perc_vram;
+ RRDDIM *rd_mem_used_perc_vram;
+
+ RRDSET *st_mem_usage_vram;
+ RRDDIM *rd_mem_used_vram;
+ RRDDIM *rd_mem_free_vram;
+
+ collected_number used_vram;
+ collected_number total_vram;
+
+
+ const char *pathname_mem_used_vis_vram;
+ const char *pathname_mem_total_vis_vram;
+
+ RRDSET *st_mem_usage_perc_vis_vram;
+ RRDDIM *rd_mem_used_perc_vis_vram;
+
+ RRDSET *st_mem_usage_vis_vram;
+ RRDDIM *rd_mem_used_vis_vram;
+ RRDDIM *rd_mem_free_vis_vram;
+
+ collected_number used_vis_vram;
+ collected_number total_vis_vram;
+
+
+ const char *pathname_mem_used_gtt;
+ const char *pathname_mem_total_gtt;
+
+ RRDSET *st_mem_usage_perc_gtt;
+ RRDDIM *rd_mem_used_perc_gtt;
+
+ RRDSET *st_mem_usage_gtt;
+ RRDDIM *rd_mem_used_gtt;
+ RRDDIM *rd_mem_free_gtt;
+
+ collected_number used_gtt;
+ collected_number total_gtt;
+
+ struct do_rrd_x *do_rrd_x_root;
+
+ struct card *next;
+};
+static struct card *card_root = NULL;
+
+static void card_free(struct card *c){
+ if(c->pathname) freez((void *) c->pathname);
+ if(c->id.marketing_name) freez((void *) c->id.marketing_name);
+
+ /* remove card from linked list */
+ if(c == card_root) card_root = c->next;
+ else {
+ struct card *last;
+ for(last = card_root; last && last->next != c; last = last->next);
+ if(last) last->next = c->next;
+ }
+
+ freez(c);
+}
+
+static int check_card_is_amdgpu(const char *const pathname){
+ int rc = -1;
+
+ procfile *ff = procfile_open(pathname, " ", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
+ if(unlikely(!ff)){
+ rc = -1;
+ goto cleanup;
+ }
+
+ ff = procfile_readall(ff);
+ if(unlikely(!ff || procfile_lines(ff) < 1 || procfile_linewords(ff, 0) < 1)){
+ rc = -2;
+ goto cleanup;
+ }
+
+ for(size_t l = 0; l < procfile_lines(ff); l++) {
+ if(!strcmp(procfile_lineword(ff, l, 0), "DRIVER=amdgpu")){
+ rc = 0;
+ goto cleanup;
+ }
+ }
+
+ rc = -3; // no match
+
+cleanup:
+ procfile_close(ff);
+ return rc;
+}
+
+static int read_clk_freq_file(procfile **p_ff, const char *const pathname, collected_number *num){
+ if(unlikely(!*p_ff)){
+ *p_ff = procfile_open(pathname, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
+ if(unlikely(!*p_ff)) return -2;
+ }
+
+ if(unlikely(NULL == (*p_ff = procfile_readall(*p_ff)))) return -3;
+
+ for(size_t l = 0; l < procfile_lines(*p_ff) ; l++) {
+
+ if((*p_ff)->lines->lines[l].words >= 3 && !strcmp(procfile_lineword((*p_ff), l, 2), "*")){
+ char *str_with_units = procfile_lineword((*p_ff), l, 1);
+ char *delim = strchr(str_with_units, 'M');
+ char str_without_units[10];
+ memcpy(str_without_units, str_with_units, delim - str_with_units);
+ *num = str2ll(str_without_units, NULL);
+ return 0;
+ }
+ }
+
+ procfile_close((*p_ff));
+ return -4;
+}
+
+static char *set_id(const char *const suf_1, const char *const suf_2, const char *const suf_3){
+ static char id[RRD_ID_LENGTH_MAX + 1];
+ snprintfz(id, RRD_ID_LENGTH_MAX, "%s_%s_%s", suf_1, suf_2, suf_3);
+ return id;
+}
+
+typedef int (*do_rrd_x_func)(struct card *const c);
+
+struct do_rrd_x {
+ do_rrd_x_func func;
+ struct do_rrd_x *next;
+};
+
+static void add_do_rrd_x(struct card *const c, const do_rrd_x_func func){
+ struct do_rrd_x *const drrd = callocz(1, sizeof(struct do_rrd_x));
+ drrd->func = func;
+ drrd->next = c->do_rrd_x_root;
+ c->do_rrd_x_root = drrd;
+}
+
+static void rm_do_rrd_x(struct card *const c, struct do_rrd_x *const drrd){
+ if(drrd == c->do_rrd_x_root) c->do_rrd_x_root = drrd->next;
+ else {
+ struct do_rrd_x *last;
+ for(last = c->do_rrd_x_root; last && last->next != drrd; last = last->next);
+ if(last) last->next = drrd->next;
+ }
+
+ freez(drrd);
+}
+
+static int do_rrd_util_gpu(struct card *const c){
+ if(likely(!read_single_number_file(c->pathname_util_gpu, (unsigned long long *) &c->util_gpu))){
+ rrddim_set_by_pointer(c->st_util_gpu, c->rd_util_gpu, c->util_gpu);
+ rrdset_done(c->st_util_gpu);
+ return 0;
+ }
+ else {
+ collector_error("Cannot read util_gpu for %s: [%s]", c->pathname, c->id.marketing_name);
+ freez((void *) c->pathname_util_gpu);
+ rrdset_is_obsolete(c->st_util_gpu);
+ return 1;
+ }
+}
+
+static int do_rrd_util_mem(struct card *const c){
+ if(likely(!read_single_number_file(c->pathname_util_mem, (unsigned long long *) &c->util_mem))){
+ rrddim_set_by_pointer(c->st_util_mem, c->rd_util_mem, c->util_mem);
+ rrdset_done(c->st_util_mem);
+ return 0;
+ }
+ else {
+ collector_error("Cannot read util_mem for %s: [%s]", c->pathname, c->id.marketing_name);
+ freez((void *) c->pathname_util_mem);
+ rrdset_is_obsolete(c->st_util_mem);
+ return 1;
+ }
+}
+
+static int do_rrd_clk_gpu(struct card *const c){
+ if(likely(!read_clk_freq_file(&c->ff_clk_gpu, (char *) c->pathname_clk_gpu, &c->clk_gpu))){
+ rrddim_set_by_pointer(c->st_clk_gpu, c->rd_clk_gpu, c->clk_gpu);
+ rrdset_done(c->st_clk_gpu);
+ return 0;
+ }
+ else {
+ collector_error("Cannot read clk_gpu for %s: [%s]", c->pathname, c->id.marketing_name);
+ freez((void *) c->pathname_clk_gpu);
+ rrdset_is_obsolete(c->st_clk_gpu);
+ return 1;
+ }
+}
+
+static int do_rrd_clk_mem(struct card *const c){
+ if(likely(!read_clk_freq_file(&c->ff_clk_mem, (char *) c->pathname_clk_mem, &c->clk_mem))){
+ rrddim_set_by_pointer(c->st_clk_mem, c->rd_clk_mem, c->clk_mem);
+ rrdset_done(c->st_clk_mem);
+ return 0;
+ }
+ else {
+ collector_error("Cannot read clk_mem for %s: [%s]", c->pathname, c->id.marketing_name);
+ freez((void *) c->pathname_clk_mem);
+ rrdset_is_obsolete(c->st_clk_mem);
+ return 1;
+ }
+}
+
+static int do_rrd_vram(struct card *const c){
+ if(likely(!read_single_number_file(c->pathname_mem_used_vram, (unsigned long long *) &c->used_vram) &&
+ c->total_vram)){
+ rrddim_set_by_pointer( c->st_mem_usage_perc_vram,
+ c->rd_mem_used_perc_vram,
+ c->used_vram * 10000 / c->total_vram);
+ rrdset_done(c->st_mem_usage_perc_vram);
+
+ rrddim_set_by_pointer(c->st_mem_usage_vram, c->rd_mem_used_vram, c->used_vram);
+ rrddim_set_by_pointer(c->st_mem_usage_vram, c->rd_mem_free_vram, c->total_vram - c->used_vram);
+ rrdset_done(c->st_mem_usage_vram);
+ return 0;
+ }
+ else {
+ collector_error("Cannot read used_vram for %s: [%s]", c->pathname, c->id.marketing_name);
+ freez((void *) c->pathname_mem_used_vram);
+ freez((void *) c->pathname_mem_total_vram);
+ rrdset_is_obsolete(c->st_mem_usage_perc_vram);
+ rrdset_is_obsolete(c->st_mem_usage_vram);
+ return 1;
+ }
+}
+
+static int do_rrd_vis_vram(struct card *const c){
+ if(likely(!read_single_number_file(c->pathname_mem_used_vis_vram, (unsigned long long *) &c->used_vis_vram) &&
+ c->total_vis_vram)){
+ rrddim_set_by_pointer( c->st_mem_usage_perc_vis_vram,
+ c->rd_mem_used_perc_vis_vram,
+ c->used_vis_vram * 10000 / c->total_vis_vram);
+ rrdset_done(c->st_mem_usage_perc_vis_vram);
+
+ rrddim_set_by_pointer(c->st_mem_usage_vis_vram, c->rd_mem_used_vis_vram, c->used_vis_vram);
+ rrddim_set_by_pointer(c->st_mem_usage_vis_vram, c->rd_mem_free_vis_vram, c->total_vis_vram - c->used_vis_vram);
+ rrdset_done(c->st_mem_usage_vis_vram);
+ return 0;
+ }
+ else {
+ collector_error("Cannot read used_vis_vram for %s: [%s]", c->pathname, c->id.marketing_name);
+ freez((void *) c->pathname_mem_used_vis_vram);
+ freez((void *) c->pathname_mem_total_vis_vram);
+ rrdset_is_obsolete(c->st_mem_usage_perc_vis_vram);
+ rrdset_is_obsolete(c->st_mem_usage_vis_vram);
+ return 1;
+ }
+}
+
+static int do_rrd_gtt(struct card *const c){
+ if(likely(!read_single_number_file(c->pathname_mem_used_gtt, (unsigned long long *) &c->used_gtt) &&
+ c->total_gtt)){
+ rrddim_set_by_pointer( c->st_mem_usage_perc_gtt,
+ c->rd_mem_used_perc_gtt,
+ c->used_gtt * 10000 / c->total_gtt);
+ rrdset_done(c->st_mem_usage_perc_gtt);
+
+ rrddim_set_by_pointer(c->st_mem_usage_gtt, c->rd_mem_used_gtt, c->used_gtt);
+ rrddim_set_by_pointer(c->st_mem_usage_gtt, c->rd_mem_free_gtt, c->total_gtt - c->used_gtt);
+ rrdset_done(c->st_mem_usage_gtt);
+ return 0;
+ }
+ else {
+ collector_error("Cannot read used_gtt for %s: [%s]", c->pathname, c->id.marketing_name);
+ freez((void *) c->pathname_mem_used_gtt);
+ freez((void *) c->pathname_mem_total_gtt);
+ rrdset_is_obsolete(c->st_mem_usage_perc_gtt);
+ rrdset_is_obsolete(c->st_mem_usage_gtt);
+ return 1;
+ }
+}
+
+int do_sys_class_drm(int update_every, usec_t dt) {
+ (void)dt;
+
+ static DIR *drm_dir = NULL;
+
+ int chart_prio = NETDATA_CHART_PRIO_DRM_AMDGPU;
+
+ if(unlikely(!drm_dir)) {
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/drm");
+ char *drm_dir_name = config_get(CONFIG_SECTION_PLUGIN_PROC_DRM, "directory to monitor", filename);
+ if(unlikely(NULL == (drm_dir = opendir(drm_dir_name)))){
+ collector_error("Cannot read directory '%s'", drm_dir_name);
+ return 1;
+ }
+
+ struct dirent *de = NULL;
+ while(likely(de = readdir(drm_dir))) {
+ if( de->d_type == DT_DIR && ((de->d_name[0] == '.' && de->d_name[1] == '\0') ||
+ (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0'))) continue;
+
+ if(de->d_type == DT_LNK && !strncmp(de->d_name, "card", 4) && !strchr(de->d_name, '-')) {
+ char filename[FILENAME_MAX + 1];
+
+ snprintfz(filename, FILENAME_MAX, "%s/%s/%s", drm_dir_name, de->d_name, "device/uevent");
+ if(check_card_is_amdgpu(filename)) continue;
+
+ /* Get static info */
+
+ struct card *const c = callocz(1, sizeof(struct card));
+ snprintfz(filename, FILENAME_MAX, "%s/%s", drm_dir_name, de->d_name);
+ c->pathname = strdupz(filename);
+
+ snprintfz(filename, FILENAME_MAX, "%s/%s", c->pathname, "device/device");
+ if(read_single_base64_or_hex_number_file(filename, &c->id.asic_id)){
+ collector_error("Cannot read asic_id from '%s'", filename);
+ card_free(c);
+ continue;
+ }
+
+ snprintfz(filename, FILENAME_MAX, "%s/%s", c->pathname, "device/revision");
+ if(read_single_base64_or_hex_number_file(filename, &c->id.pci_rev_id)){
+ collector_error("Cannot read pci_rev_id from '%s'", filename);
+ card_free(c);
+ continue;
+ }
+
+ for(int i = 0; amdgpu_ids[i].asic_id; i++){
+ if(c->id.asic_id == amdgpu_ids[i].asic_id && c->id.pci_rev_id == amdgpu_ids[i].pci_rev_id){
+ c->id.marketing_name = strdupz(amdgpu_ids[i].marketing_name);
+ break;
+ }
+ }
+ if(!c->id.marketing_name)
+ c->id.marketing_name = strdupz(amdgpu_ids[sizeof(amdgpu_ids)/sizeof(amdgpu_ids[0]) - 1].marketing_name);
+
+
+ collected_number tmp_val;
+ #define set_prop_pathname(prop_filename, prop_pathname, p_ff){ \
+ snprintfz(filename, FILENAME_MAX, "%s/%s", c->pathname, prop_filename); \
+ if((p_ff && !read_clk_freq_file(p_ff, filename, &tmp_val)) || \
+ !read_single_number_file(filename, (unsigned long long *) &tmp_val)) \
+ prop_pathname = strdupz(filename); \
+ else \
+ collector_info("Cannot read file '%s'", filename); \
+ }
+
+ /* Initialize GPU and VRAM utilization metrics */
+
+ set_prop_pathname("device/gpu_busy_percent", c->pathname_util_gpu, NULL);
+
+ if(c->pathname_util_gpu){
+ c->st_util_gpu = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_utilization", c->id.marketing_name, de->d_name)
+ , NULL
+ , "utilization"
+ , AMDGPU_CHART_TYPE ".gpu_utilization"
+ , "GPU utilization"
+ , "percentage"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(c->st_util_gpu->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_util_gpu = rrddim_add(c->st_util_gpu, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ add_do_rrd_x(c, do_rrd_util_gpu);
+ }
+
+ set_prop_pathname("device/mem_busy_percent", c->pathname_util_mem, NULL);
+
+ if(c->pathname_util_mem){
+ c->st_util_mem = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_utilization", c->id.marketing_name, de->d_name)
+ , NULL
+ , "utilization"
+ , AMDGPU_CHART_TYPE ".gpu_mem_utilization"
+ , "GPU memory utilization"
+ , "percentage"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(c->st_util_mem->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_util_mem = rrddim_add(c->st_util_mem, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ add_do_rrd_x(c, do_rrd_util_mem);
+ }
+
+
+ /* Initialize GPU and VRAM clock frequency metrics */
+
+ set_prop_pathname("device/pp_dpm_sclk", c->pathname_clk_gpu, &c->ff_clk_gpu);
+
+ if(c->pathname_clk_gpu){
+ c->st_clk_gpu = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_clk_frequency", c->id.marketing_name, de->d_name)
+ , NULL
+ , "frequency"
+ , AMDGPU_CHART_TYPE ".gpu_clk_frequency"
+ , "GPU clock frequency"
+ , "MHz"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(c->st_clk_gpu->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_clk_gpu = rrddim_add(c->st_clk_gpu, "frequency", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ add_do_rrd_x(c, do_rrd_clk_gpu);
+
+ }
+
+ set_prop_pathname("device/pp_dpm_mclk", c->pathname_clk_mem, &c->ff_clk_mem);
+
+ if(c->pathname_clk_mem){
+ c->st_clk_mem = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_clk_frequency", c->id.marketing_name, de->d_name)
+ , NULL
+ , "frequency"
+ , AMDGPU_CHART_TYPE ".gpu_mem_clk_frequency"
+ , "GPU memory clock frequency"
+ , "MHz"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(c->st_clk_mem->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_clk_mem = rrddim_add(c->st_clk_mem, "frequency", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ add_do_rrd_x(c, do_rrd_clk_mem);
+ }
+
+
+ /* Initialize GPU memory usage metrics */
+
+ set_prop_pathname("device/mem_info_vram_used", c->pathname_mem_used_vram, NULL);
+ set_prop_pathname("device/mem_info_vram_total", c->pathname_mem_total_vram, NULL);
+ if(c->pathname_mem_total_vram) c->total_vram = tmp_val;
+
+ if(c->pathname_mem_used_vram && c->pathname_mem_total_vram){
+ c->st_mem_usage_perc_vram = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_vram_usage_perc", c->id.marketing_name, de->d_name)
+ , NULL
+ , "memory_usage"
+ , AMDGPU_CHART_TYPE ".gpu_mem_vram_usage_perc"
+ , "VRAM memory usage percentage"
+ , "percentage"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(c->st_mem_usage_perc_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_mem_used_perc_vram = rrddim_add(c->st_mem_usage_perc_vram, "usage", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+
+
+ c->st_mem_usage_vram = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_vram_usage", c->id.marketing_name, de->d_name)
+ , NULL
+ , "memory_usage"
+ , AMDGPU_CHART_TYPE ".gpu_mem_vram_usage"
+ , "VRAM memory usage"
+ , "bytes"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ rrdlabels_add(c->st_mem_usage_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_mem_free_vram = rrddim_add(c->st_mem_usage_vram, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ c->rd_mem_used_vram = rrddim_add(c->st_mem_usage_vram, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+
+ add_do_rrd_x(c, do_rrd_vram);
+ }
+
+ set_prop_pathname("device/mem_info_vis_vram_used", c->pathname_mem_used_vis_vram, NULL);
+ set_prop_pathname("device/mem_info_vis_vram_total", c->pathname_mem_total_vis_vram, NULL);
+ if(c->pathname_mem_total_vis_vram) c->total_vis_vram = tmp_val;
+
+ if(c->pathname_mem_used_vis_vram && c->pathname_mem_total_vis_vram){
+ c->st_mem_usage_perc_vis_vram = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_vis_vram_usage_perc", c->id.marketing_name, de->d_name)
+ , NULL
+ , "memory_usage"
+ , AMDGPU_CHART_TYPE ".gpu_mem_vis_vram_usage_perc"
+ , "visible VRAM memory usage percentage"
+ , "percentage"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(c->st_mem_usage_perc_vis_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_mem_used_perc_vis_vram = rrddim_add(c->st_mem_usage_perc_vis_vram, "usage", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+
+
+ c->st_mem_usage_vis_vram = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_vis_vram_usage", c->id.marketing_name, de->d_name)
+ , NULL
+ , "memory_usage"
+ , AMDGPU_CHART_TYPE ".gpu_mem_vis_vram_usage"
+ , "visible VRAM memory usage"
+ , "bytes"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ rrdlabels_add(c->st_mem_usage_vis_vram->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_mem_free_vis_vram = rrddim_add(c->st_mem_usage_vis_vram, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ c->rd_mem_used_vis_vram = rrddim_add(c->st_mem_usage_vis_vram, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+
+ add_do_rrd_x(c, do_rrd_vis_vram);
+ }
+
+ set_prop_pathname("device/mem_info_gtt_used", c->pathname_mem_used_gtt, NULL);
+ set_prop_pathname("device/mem_info_gtt_total", c->pathname_mem_total_gtt, NULL);
+ if(c->pathname_mem_total_gtt) c->total_gtt = tmp_val;
+
+ if(c->pathname_mem_used_gtt && c->pathname_mem_total_gtt){
+ c->st_mem_usage_perc_gtt = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_gtt_usage_perc", c->id.marketing_name, de->d_name)
+ , NULL
+ , "memory_usage"
+ , AMDGPU_CHART_TYPE ".gpu_mem_gtt_usage_perc"
+ , "GTT memory usage percentage"
+ , "percentage"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(c->st_mem_usage_perc_gtt->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_mem_used_perc_gtt = rrddim_add(c->st_mem_usage_perc_gtt, "usage", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+
+ c->st_mem_usage_gtt = rrdset_create_localhost(
+ AMDGPU_CHART_TYPE
+ , set_id("gpu_mem_gtt_usage", c->id.marketing_name, de->d_name)
+ , NULL
+ , "memory_usage"
+ , AMDGPU_CHART_TYPE ".gpu_mem_gtt_usage"
+ , "GTT memory usage"
+ , "bytes"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DRM_NAME
+ , chart_prio++
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ rrdlabels_add(c->st_mem_usage_gtt->rrdlabels, "product_name", c->id.marketing_name, RRDLABEL_SRC_AUTO);
+
+ c->rd_mem_free_gtt = rrddim_add(c->st_mem_usage_gtt, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ c->rd_mem_used_gtt = rrddim_add(c->st_mem_usage_gtt, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+
+ add_do_rrd_x(c, do_rrd_gtt);
+ }
+
+ c->next = card_root;
+ card_root = c;
+ }
+ }
+ }
+
+
+ struct card *card_cur = card_root,
+ *card_next;
+ while(card_cur){
+
+ struct do_rrd_x *do_rrd_x_cur = card_cur->do_rrd_x_root,
+ *do_rrd_x_next;
+ while(do_rrd_x_cur){
+ if(unlikely(do_rrd_x_cur->func(card_cur))) {
+ do_rrd_x_next = do_rrd_x_cur->next;
+ rm_do_rrd_x(card_cur, do_rrd_x_cur);
+ do_rrd_x_cur = do_rrd_x_next;
+ }
+ else do_rrd_x_cur = do_rrd_x_cur->next;
+ }
+
+ if(unlikely(!card_cur->do_rrd_x_root)){
+ card_next = card_cur->next;
+ card_free(card_cur);
+ card_cur = card_next;
+ }
+ else card_cur = card_cur->next;
+ }
+
+ return card_root ? 0 : 1;
+}
diff --git a/collectors/proc.plugin/sys_devices_pci_aer.c b/collectors/proc.plugin/sys_devices_pci_aer.c
new file mode 100644
index 00000000..13442623
--- /dev/null
+++ b/collectors/proc.plugin/sys_devices_pci_aer.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "plugin_proc.h"
+
+static char *pci_aer_dirname = NULL;
+
+typedef enum __attribute__((packed)) {
+ AER_DEV_NONFATAL = (1 << 0),
+ AER_DEV_CORRECTABLE = (1 << 1),
+ AER_DEV_FATAL = (1 << 2),
+ AER_ROOTPORT_TOTAL_ERR_COR = (1 << 3),
+ AER_ROOTPORT_TOTAL_ERR_FATAL = (1 << 4),
+} AER_TYPE;
+
+struct aer_value {
+ kernel_uint_t count;
+ RRDDIM *rd;
+};
+
+struct aer_entry {
+ bool updated;
+
+ STRING *name;
+ AER_TYPE type;
+
+ procfile *ff;
+ DICTIONARY *values;
+
+ RRDSET *st;
+};
+
+DICTIONARY *aer_root = NULL;
+
+static bool aer_value_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
+ struct aer_value *v = old_value;
+ struct aer_value *nv = new_value;
+
+ v->count = nv->count;
+
+ return false;
+}
+
+static void aer_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
+ struct aer_entry *a = value;
+ a->values = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_DONT_OVERWRITE_VALUE);
+ dictionary_register_conflict_callback(a->values, aer_value_conflict_callback, NULL);
+}
+
+static void add_pci_aer(const char *base_dir, const char *d_name, AER_TYPE type) {
+ char buffer[FILENAME_MAX + 1];
+ snprintfz(buffer, FILENAME_MAX, "%s/%s", base_dir, d_name);
+ struct aer_entry *a = dictionary_set(aer_root, buffer, NULL, sizeof(struct aer_entry));
+
+ if(!a->name)
+ a->name = string_strdupz(d_name);
+
+ a->type = type;
+}
+
+static bool recursively_find_pci_aer(AER_TYPE types, const char *base_dir, const char *d_name, int depth) {
+ if(depth > 100)
+ return false;
+
+ char buffer[FILENAME_MAX + 1];
+ snprintfz(buffer, FILENAME_MAX, "%s/%s", base_dir, d_name);
+ DIR *dir = opendir(buffer);
+ if(unlikely(!dir)) {
+ collector_error("Cannot read PCI_AER directory '%s'", buffer);
+ return true;
+ }
+
+ struct dirent *de = NULL;
+ while((de = readdir(dir))) {
+ if(de->d_type == DT_DIR) {
+ if(de->d_name[0] == '.')
+ continue;
+
+ recursively_find_pci_aer(types, buffer, de->d_name, depth + 1);
+ }
+ else if(de->d_type == DT_REG) {
+ if((types & AER_DEV_NONFATAL) && strcmp(de->d_name, "aer_dev_nonfatal") == 0) {
+ add_pci_aer(buffer, de->d_name, AER_DEV_NONFATAL);
+ }
+ else if((types & AER_DEV_CORRECTABLE) && strcmp(de->d_name, "aer_dev_correctable") == 0) {
+ add_pci_aer(buffer, de->d_name, AER_DEV_CORRECTABLE);
+ }
+ else if((types & AER_DEV_FATAL) && strcmp(de->d_name, "aer_dev_fatal") == 0) {
+ add_pci_aer(buffer, de->d_name, AER_DEV_FATAL);
+ }
+ else if((types & AER_ROOTPORT_TOTAL_ERR_COR) && strcmp(de->d_name, "aer_rootport_total_err_cor") == 0) {
+ add_pci_aer(buffer, de->d_name, AER_ROOTPORT_TOTAL_ERR_COR);
+ }
+ else if((types & AER_ROOTPORT_TOTAL_ERR_FATAL) && strcmp(de->d_name, "aer_rootport_total_err_fatal") == 0) {
+ add_pci_aer(buffer, de->d_name, AER_ROOTPORT_TOTAL_ERR_FATAL);
+ }
+ }
+ }
+ closedir(dir);
+ return true;
+}
+
+static void find_all_pci_aer(AER_TYPE types) {
+ char name[FILENAME_MAX + 1];
+ snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices");
+ pci_aer_dirname = config_get("plugin:proc:/sys/devices/pci/aer", "directory to monitor", name);
+
+ DIR *dir = opendir(pci_aer_dirname);
+ if(unlikely(!dir)) {
+ collector_error("Cannot read PCI_AER directory '%s'", pci_aer_dirname);
+ return;
+ }
+
+ struct dirent *de = NULL;
+ while((de = readdir(dir))) {
+ if(de->d_type == DT_DIR && de->d_name[0] == 'p' && de->d_name[1] == 'c' && de->d_name[2] == 'i' && isdigit(de->d_name[3]))
+ recursively_find_pci_aer(types, pci_aer_dirname, de->d_name, 1);
+ }
+ closedir(dir);
+}
+
+static void read_pci_aer_values(const char *filename, struct aer_entry *t) {
+ t->updated = false;
+
+ if(unlikely(!t->ff)) {
+ t->ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
+ if(unlikely(!t->ff))
+ return;
+ }
+
+ t->ff = procfile_readall(t->ff);
+ if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1))
+ return;
+
+ size_t lines = procfile_lines(t->ff);
+ for(size_t l = 0; l < lines ; l++) {
+ if(procfile_linewords(t->ff, l) != 2)
+ continue;
+
+ struct aer_value v = {
+ .count = str2ull(procfile_lineword(t->ff, l, 1), NULL)
+ };
+
+ char *key = procfile_lineword(t->ff, l, 0);
+ if(!key || !*key || (key[0] == 'T' && key[1] == 'O' && key[2] == 'T' && key[3] == 'A' && key[4] == 'L' && key[5] == '_'))
+ continue;
+
+ dictionary_set(t->values, key, &v, sizeof(v));
+ }
+
+ t->updated = true;
+}
+
+static void read_pci_aer_count(const char *filename, struct aer_entry *t) {
+ t->updated = false;
+
+ if(unlikely(!t->ff)) {
+ t->ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
+ if(unlikely(!t->ff))
+ return;
+ }
+
+ t->ff = procfile_readall(t->ff);
+ if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1))
+ return;
+
+ struct aer_value v = {
+ .count = str2ull(procfile_lineword(t->ff, 0, 0), NULL)
+ };
+ dictionary_set(t->values, "count", &v, sizeof(v));
+ t->updated = true;
+}
+
+static void add_label_from_link(struct aer_entry *a, const char *path, const char *link) {
+ char name[FILENAME_MAX + 1];
+ strncpyz(name, path, FILENAME_MAX);
+ char *slash = strrchr(name, '/');
+ if(slash)
+ *slash = '\0';
+
+ char name2[FILENAME_MAX + 1];
+ snprintfz(name2, FILENAME_MAX, "%s/%s", name, link);
+
+ ssize_t len = readlink(name2, name, FILENAME_MAX);
+ if(len != -1) {
+ name[len] = '\0'; // Null-terminate the string
+ slash = strrchr(name, '/');
+ if(slash) slash++;
+ else slash = name;
+ rrdlabels_add(a->st->rrdlabels, link, slash, RRDLABEL_SRC_AUTO);
+ }
+}
+
+int do_proc_sys_devices_pci_aer(int update_every, usec_t dt __maybe_unused) {
+ if(unlikely(!aer_root)) {
+ int do_root_ports = CONFIG_BOOLEAN_AUTO;
+ int do_pci_slots = CONFIG_BOOLEAN_NO;
+
+ char buffer[100 + 1] = "";
+ rrdlabels_get_value_strcpyz(localhost->rrdlabels, buffer, 100, "_virtualization");
+ if(strcmp(buffer, "none") != 0) {
+ // no need to run on virtualized environments
+ do_root_ports = CONFIG_BOOLEAN_NO;
+ do_pci_slots = CONFIG_BOOLEAN_NO;
+ }
+
+ do_root_ports = config_get_boolean("plugin:proc:/sys/class/pci/aer", "enable root ports", do_root_ports);
+ do_pci_slots = config_get_boolean("plugin:proc:/sys/class/pci/aer", "enable pci slots", do_pci_slots);
+
+ if(!do_root_ports && !do_pci_slots)
+ return 1;
+
+ aer_root = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE);
+ dictionary_register_insert_callback(aer_root, aer_insert_callback, NULL);
+
+ AER_TYPE types = ((do_root_ports) ? (AER_ROOTPORT_TOTAL_ERR_COR|AER_ROOTPORT_TOTAL_ERR_FATAL) : 0) |
+ ((do_pci_slots) ? (AER_DEV_FATAL|AER_DEV_NONFATAL|AER_DEV_CORRECTABLE) : 0);
+
+ find_all_pci_aer(types);
+
+ if(!dictionary_entries(aer_root))
+ return 1;
+ }
+
+ struct aer_entry *a;
+ dfe_start_read(aer_root, a) {
+ switch(a->type) {
+ case AER_DEV_NONFATAL:
+ case AER_DEV_FATAL:
+ case AER_DEV_CORRECTABLE:
+ read_pci_aer_values(a_dfe.name, a);
+ break;
+
+ case AER_ROOTPORT_TOTAL_ERR_COR:
+ case AER_ROOTPORT_TOTAL_ERR_FATAL:
+ read_pci_aer_count(a_dfe.name, a);
+ break;
+ }
+
+ if(!a->updated)
+ continue;
+
+ if(!a->st) {
+ const char *title;
+ const char *context;
+
+ switch(a->type) {
+ case AER_DEV_NONFATAL:
+ title = "PCI Advanced Error Reporting (AER) Non-Fatal Errors";
+ context = "pci.aer_nonfatal";
+ break;
+
+ case AER_DEV_FATAL:
+ title = "PCI Advanced Error Reporting (AER) Fatal Errors";
+ context = "pci.aer_fatal";
+ break;
+
+ case AER_DEV_CORRECTABLE:
+ title = "PCI Advanced Error Reporting (AER) Correctable Errors";
+ context = "pci.aer_correctable";
+ break;
+
+ case AER_ROOTPORT_TOTAL_ERR_COR:
+ title = "PCI Root-Port Advanced Error Reporting (AER) Correctable Errors";
+ context = "pci.rootport_aer_correctable";
+ break;
+
+ case AER_ROOTPORT_TOTAL_ERR_FATAL:
+ title = "PCI Root-Port Advanced Error Reporting (AER) Fatal Errors";
+ context = "pci.rootport_aer_fatal";
+ break;
+ }
+
+ char id[RRD_ID_LENGTH_MAX + 1];
+ char nm[RRD_ID_LENGTH_MAX + 1];
+ size_t len = strlen(pci_aer_dirname);
+
+ const char *fname = a_dfe.name;
+ if(strncmp(a_dfe.name, pci_aer_dirname, len) == 0)
+ fname = &a_dfe.name[len];
+
+ if(*fname == '/')
+ fname++;
+
+ snprintfz(id, RRD_ID_LENGTH_MAX, "%s_%s", &context[4], fname);
+ char *slash = strrchr(id, '/');
+ if(slash)
+ *slash = '\0';
+
+ netdata_fix_chart_id(id);
+
+ snprintfz(nm, RRD_ID_LENGTH_MAX, "%s", fname);
+ slash = strrchr(nm, '/');
+ if(slash)
+ *slash = '\0';
+
+ a->st = rrdset_create_localhost(
+ "pci"
+ , id
+ , NULL
+ , "aer"
+ , context
+ , title
+ , "errors/s"
+ , PLUGIN_PROC_NAME
+ , "/sys/devices/pci/aer"
+ , NETDATA_CHART_PRIO_PCI_AER
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdlabels_add(a->st->rrdlabels, "device", nm, RRDLABEL_SRC_AUTO);
+ add_label_from_link(a, a_dfe.name, "driver");
+
+ struct aer_value *v;
+ dfe_start_read(a->values, v) {
+ v->rd = rrddim_add(a->st, v_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ dfe_done(v);
+ }
+
+ struct aer_value *v;
+ dfe_start_read(a->values, v) {
+ if(unlikely(!v->rd))
+ v->rd = rrddim_add(a->st, v_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+
+ rrddim_set_by_pointer(a->st, v->rd, (collected_number)v->count);
+ }
+ dfe_done(v);
+
+ rrdset_done(a->st);
+ }
+ dfe_done(a);
+
+ return 0;
+}
diff --git a/collectors/proc.plugin/sys_devices_system_edac_mc.c b/collectors/proc.plugin/sys_devices_system_edac_mc.c
index fdb6b51e..0947f61f 100644
--- a/collectors/proc.plugin/sys_devices_system_edac_mc.c
+++ b/collectors/proc.plugin/sys_devices_system_edac_mc.c
@@ -2,35 +2,51 @@
#include "plugin_proc.h"
+struct edac_count {
+ bool updated;
+ char *filename;
+ procfile *ff;
+ kernel_uint_t count;
+ RRDDIM *rd;
+};
+
+struct edac_dimm {
+ char *name;
+
+ struct edac_count ce;
+ struct edac_count ue;
+
+ RRDSET *st;
+
+ struct edac_dimm *prev, *next;
+};
+
struct mc {
char *name;
- char ce_updated;
- char ue_updated;
- char *ce_count_filename;
- char *ue_count_filename;
+ struct edac_count ce;
+ struct edac_count ue;
+ struct edac_count ce_noinfo;
+ struct edac_count ue_noinfo;
- procfile *ce_ff;
- procfile *ue_ff;
+ RRDSET *st;
- collected_number ce_count;
- collected_number ue_count;
+ struct edac_dimm *dimms;
- RRDDIM *ce_rd;
- RRDDIM *ue_rd;
-
- struct mc *next;
+ struct mc *prev, *next;
};
+
static struct mc *mc_root = NULL;
+static char *mc_dirname = NULL;
static void find_all_mc() {
char name[FILENAME_MAX + 1];
snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/edac/mc");
- char *dirname = config_get("plugin:proc:/sys/devices/system/edac/mc", "directory to monitor", name);
+ mc_dirname = config_get("plugin:proc:/sys/devices/system/edac/mc", "directory to monitor", name);
- DIR *dir = opendir(dirname);
+ DIR *dir = opendir(mc_dirname);
if(unlikely(!dir)) {
- collector_error("Cannot read ECC memory errors directory '%s'", dirname);
+ collector_error("Cannot read EDAC memory errors directory '%s'", mc_dirname);
return;
}
@@ -42,162 +58,240 @@ static void find_all_mc() {
struct stat st;
- snprintfz(name, FILENAME_MAX, "%s/%s/ce_count", dirname, de->d_name);
+ snprintfz(name, FILENAME_MAX, "%s/%s/ce_count", mc_dirname, de->d_name);
if(stat(name, &st) != -1)
- m->ce_count_filename = strdupz(name);
+ m->ce.filename = strdupz(name);
- snprintfz(name, FILENAME_MAX, "%s/%s/ue_count", dirname, de->d_name);
+ snprintfz(name, FILENAME_MAX, "%s/%s/ue_count", mc_dirname, de->d_name);
if(stat(name, &st) != -1)
- m->ue_count_filename = strdupz(name);
+ m->ue.filename = strdupz(name);
- if(!m->ce_count_filename && !m->ue_count_filename) {
+ snprintfz(name, FILENAME_MAX, "%s/%s/ce_noinfo_count", mc_dirname, de->d_name);
+ if(stat(name, &st) != -1)
+ m->ce_noinfo.filename = strdupz(name);
+
+ snprintfz(name, FILENAME_MAX, "%s/%s/ue_noinfo_count", mc_dirname, de->d_name);
+ if(stat(name, &st) != -1)
+ m->ue_noinfo.filename = strdupz(name);
+
+ if(!m->ce.filename && !m->ue.filename && !m->ce_noinfo.filename && !m->ue_noinfo.filename) {
freez(m->name);
freez(m);
}
- else {
- m->next = mc_root;
- mc_root = m;
- }
+ else
+ DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(mc_root, m, prev, next);
}
}
-
closedir(dir);
-}
-int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt) {
- (void)dt;
+ for(struct mc *m = mc_root; m ;m = m->next) {
+ snprintfz(name, FILENAME_MAX, "%s/%s", mc_dirname, m->name);
+ dir = opendir(name);
+ if(!dir) {
+ collector_error("Cannot read EDAC memory errors directory '%s'", name);
+ continue;
+ }
- if(unlikely(mc_root == NULL)) {
- find_all_mc();
- if(unlikely(mc_root == NULL))
- return 1;
- }
+ while((de = readdir(dir))) {
+ // it can be dimmX or rankX directory
+ // https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#f5
- static int do_ce = -1, do_ue = -1;
- NETDATA_DOUBLE ce_sum = 0, ue_sum = 0;
- struct mc *m;
+ if (de->d_type == DT_DIR &&
+ ((strncmp(de->d_name, "rank", 4) == 0 || strncmp(de->d_name, "dimm", 4) == 0)) &&
+ isdigit(de->d_name[4])) {
- if(unlikely(do_ce == -1)) {
- do_ce = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory correctable errors", CONFIG_BOOLEAN_YES);
- do_ue = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory uncorrectable errors", CONFIG_BOOLEAN_YES);
- }
+ struct edac_dimm *d = callocz(1, sizeof(struct edac_dimm));
+ d->name = strdupz(de->d_name);
- if(do_ce != CONFIG_BOOLEAN_NO) {
- for(m = mc_root; m; m = m->next) {
- if(m->ce_count_filename) {
- m->ce_updated = 0;
+ struct stat st;
- if(unlikely(!m->ce_ff)) {
- m->ce_ff = procfile_open(m->ce_count_filename, " \t", PROCFILE_FLAG_DEFAULT);
- if(unlikely(!m->ce_ff))
- continue;
- }
+ snprintfz(name, FILENAME_MAX, "%s/%s/%s/dimm_ce_count", mc_dirname, m->name, de->d_name);
+ if(stat(name, &st) != -1)
+ d->ce.filename = strdupz(name);
- m->ce_ff = procfile_readall(m->ce_ff);
- if(unlikely(!m->ce_ff || procfile_lines(m->ce_ff) < 1 || procfile_linewords(m->ce_ff, 0) < 1))
- continue;
+ snprintfz(name, FILENAME_MAX, "%s/%s/%s/dimm_ue_count", mc_dirname, m->name, de->d_name);
+ if(stat(name, &st) != -1)
+ d->ue.filename = strdupz(name);
- m->ce_count = str2ull(procfile_lineword(m->ce_ff, 0, 0), NULL);
- ce_sum += m->ce_count;
- m->ce_updated = 1;
+ if(!d->ce.filename && !d->ue.filename) {
+ freez(d->name);
+ freez(d);
+ }
+ else
+ DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(m->dimms, d, prev, next);
}
}
+ closedir(dir);
}
+}
- if(do_ue != CONFIG_BOOLEAN_NO) {
- for(m = mc_root; m; m = m->next) {
- if(m->ue_count_filename) {
- m->ue_updated = 0;
+static kernel_uint_t read_edac_count(struct edac_count *t) {
+ t->updated = false;
+ t->count = 0;
- if(unlikely(!m->ue_ff)) {
- m->ue_ff = procfile_open(m->ue_count_filename, " \t", PROCFILE_FLAG_DEFAULT);
- if(unlikely(!m->ue_ff))
- continue;
- }
+ if(t->filename) {
+ if(unlikely(!t->ff)) {
+ t->ff = procfile_open(t->filename, " \t", PROCFILE_FLAG_DEFAULT);
+ if(unlikely(!t->ff))
+ return 0;
+ }
- m->ue_ff = procfile_readall(m->ue_ff);
- if(unlikely(!m->ue_ff || procfile_lines(m->ue_ff) < 1 || procfile_linewords(m->ue_ff, 0) < 1))
- continue;
+ t->ff = procfile_readall(t->ff);
+ if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1))
+ return 0;
- m->ue_count = str2ull(procfile_lineword(m->ue_ff, 0, 0), NULL);
- ue_sum += m->ue_count;
- m->ue_updated = 1;
- }
+ t->count = str2ull(procfile_lineword(t->ff, 0, 0), NULL);
+ t->updated = true;
+ }
+
+ return t->count;
+}
+
+static bool read_edac_mc_file(const char *mc, const char *filename, char *out, size_t out_size) {
+ char f[FILENAME_MAX + 1];
+ snprintfz(f, FILENAME_MAX, "%s/%s/%s", mc_dirname, mc, filename);
+ if(read_file(f, out, out_size) != 0) {
+ collector_error("EDAC: cannot read file '%s'", f);
+ return false;
+ }
+ return true;
+}
+
+static bool read_edac_mc_rank_file(const char *mc, const char *rank, const char *filename, char *out, size_t out_size) {
+ char f[FILENAME_MAX + 1];
+ snprintfz(f, FILENAME_MAX, "%s/%s/%s/%s", mc_dirname, mc, rank, filename);
+ if(read_file(f, out, out_size) != 0) {
+ collector_error("EDAC: cannot read file '%s'", f);
+ return false;
+ }
+ return true;
+}
+
+int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt __maybe_unused) {
+ if(unlikely(!mc_root)) {
+ find_all_mc();
+
+ if(!mc_root)
+ // don't call this again
+ return 1;
+ }
+
+ for(struct mc *m = mc_root; m; m = m->next) {
+ read_edac_count(&m->ce);
+ read_edac_count(&m->ce_noinfo);
+ read_edac_count(&m->ue);
+ read_edac_count(&m->ue_noinfo);
+
+ for(struct edac_dimm *d = m->dimms; d ;d = d->next) {
+ read_edac_count(&d->ce);
+ read_edac_count(&d->ue);
}
}
// --------------------------------------------------------------------
- if(do_ce == CONFIG_BOOLEAN_YES || (do_ce == CONFIG_BOOLEAN_AUTO &&
- (ce_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
- do_ce = CONFIG_BOOLEAN_YES;
+ for(struct mc *m = mc_root; m ; m = m->next) {
+ if(unlikely(!m->ce.updated && !m->ue.updated && !m->ce_noinfo.updated && !m->ue_noinfo.updated))
+ continue;
- static RRDSET *ce_st = NULL;
-
- if(unlikely(!ce_st)) {
- ce_st = rrdset_create_localhost(
+ if(unlikely(!m->st)) {
+ char id[RRD_ID_LENGTH_MAX + 1];
+ snprintfz(id, RRD_ID_LENGTH_MAX, "edac_%s", m->name);
+ m->st = rrdset_create_localhost(
"mem"
- , "ecc_ce"
- , NULL
- , "ecc"
+ , id
, NULL
- , "ECC Memory Correctable Errors"
- , "errors"
+ , "edac"
+ , "mem.edac_mc"
+ , "Memory Controller (MC) Error Detection And Correction (EDAC) Errors"
+ , "errors/s"
, PLUGIN_PROC_NAME
, "/sys/devices/system/edac/mc"
, NETDATA_CHART_PRIO_MEM_HW_ECC_CE
, update_every
, RRDSET_TYPE_LINE
);
- }
- for(m = mc_root; m; m = m->next) {
- if (m->ce_count_filename && m->ce_updated) {
- if(unlikely(!m->ce_rd))
- m->ce_rd = rrddim_add(ce_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rrdlabels_add(m->st->rrdlabels, "controller", m->name, RRDLABEL_SRC_AUTO);
- rrddim_set_by_pointer(ce_st, m->ce_rd, m->ce_count);
- }
+ char buffer[1024 + 1];
+
+ if(read_edac_mc_file(m->name, "mc_name", buffer, 1024))
+ rrdlabels_add(m->st->rrdlabels, "mc_name", buffer, RRDLABEL_SRC_AUTO);
+
+ if(read_edac_mc_file(m->name, "size_mb", buffer, 1024))
+ rrdlabels_add(m->st->rrdlabels, "size_mb", buffer, RRDLABEL_SRC_AUTO);
+
+ if(read_edac_mc_file(m->name, "max_location", buffer, 1024))
+ rrdlabels_add(m->st->rrdlabels, "max_location", buffer, RRDLABEL_SRC_AUTO);
+
+ m->ce.rd = rrddim_add(m->st, "correctable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ m->ue.rd = rrddim_add(m->st, "uncorrectable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ m->ce_noinfo.rd = rrddim_add(m->st, "correctable_noinfo", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ m->ue_noinfo.rd = rrddim_add(m->st, "uncorrectable_noinfo", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
- rrdset_done(ce_st);
- }
+ rrddim_set_by_pointer(m->st, m->ce.rd, (collected_number)m->ce.count);
+ rrddim_set_by_pointer(m->st, m->ue.rd, (collected_number)m->ue.count);
+ rrddim_set_by_pointer(m->st, m->ce_noinfo.rd, (collected_number)m->ce_noinfo.count);
+ rrddim_set_by_pointer(m->st, m->ue_noinfo.rd, (collected_number)m->ue_noinfo.count);
- // --------------------------------------------------------------------
+ rrdset_done(m->st);
- if(do_ue == CONFIG_BOOLEAN_YES || (do_ue == CONFIG_BOOLEAN_AUTO &&
- (ue_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
- do_ue = CONFIG_BOOLEAN_YES;
+ for(struct edac_dimm *d = m->dimms; d ;d = d->next) {
+ if(unlikely(!d->ce.updated && !d->ue.updated))
+ continue;
- static RRDSET *ue_st = NULL;
+ if(unlikely(!d->st)) {
+ char id[RRD_ID_LENGTH_MAX + 1];
+ snprintfz(id, RRD_ID_LENGTH_MAX, "edac_%s_%s", m->name, d->name);
+ d->st = rrdset_create_localhost(
+ "mem"
+ , id
+ , NULL
+ , "edac"
+ , "mem.edac_mc_dimm"
+ , "DIMM Error Detection And Correction (EDAC) Errors"
+ , "errors/s"
+ , PLUGIN_PROC_NAME
+ , "/sys/devices/system/edac/mc"
+ , NETDATA_CHART_PRIO_MEM_HW_ECC_CE + 1
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
- if(unlikely(!ue_st)) {
- ue_st = rrdset_create_localhost(
- "mem"
- , "ecc_ue"
- , NULL
- , "ecc"
- , NULL
- , "ECC Memory Uncorrectable Errors"
- , "errors"
- , PLUGIN_PROC_NAME
- , "/sys/devices/system/edac/mc"
- , NETDATA_CHART_PRIO_MEM_HW_ECC_UE
- , update_every
- , RRDSET_TYPE_LINE
- );
- }
+ rrdlabels_add(d->st->rrdlabels, "controller", m->name, RRDLABEL_SRC_AUTO);
+ rrdlabels_add(d->st->rrdlabels, "dimm", d->name, RRDLABEL_SRC_AUTO);
+
+ char buffer[1024 + 1];
- for(m = mc_root; m; m = m->next) {
- if (m->ue_count_filename && m->ue_updated) {
- if(unlikely(!m->ue_rd))
- m->ue_rd = rrddim_add(ue_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ if(read_edac_mc_rank_file(m->name, d->name, "dimm_dev_type", buffer, 1024))
+ rrdlabels_add(d->st->rrdlabels, "dimm_dev_type", buffer, RRDLABEL_SRC_AUTO);
- rrddim_set_by_pointer(ue_st, m->ue_rd, m->ue_count);
+ if(read_edac_mc_rank_file(m->name, d->name, "dimm_edac_mode", buffer, 1024))
+ rrdlabels_add(d->st->rrdlabels, "dimm_edac_mode", buffer, RRDLABEL_SRC_AUTO);
+
+ if(read_edac_mc_rank_file(m->name, d->name, "dimm_label", buffer, 1024))
+ rrdlabels_add(d->st->rrdlabels, "dimm_label", buffer, RRDLABEL_SRC_AUTO);
+
+ if(read_edac_mc_rank_file(m->name, d->name, "dimm_location", buffer, 1024))
+ rrdlabels_add(d->st->rrdlabels, "dimm_location", buffer, RRDLABEL_SRC_AUTO);
+
+ if(read_edac_mc_rank_file(m->name, d->name, "dimm_mem_type", buffer, 1024))
+ rrdlabels_add(d->st->rrdlabels, "dimm_mem_type", buffer, RRDLABEL_SRC_AUTO);
+
+ if(read_edac_mc_rank_file(m->name, d->name, "size", buffer, 1024))
+ rrdlabels_add(d->st->rrdlabels, "size", buffer, RRDLABEL_SRC_AUTO);
+
+ d->ce.rd = rrddim_add(d->st, "correctable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ d->ue.rd = rrddim_add(d->st, "uncorrectable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
- }
- rrdset_done(ue_st);
+ rrddim_set_by_pointer(d->st, d->ce.rd, (collected_number)d->ce.count);
+ rrddim_set_by_pointer(d->st, d->ue.rd, (collected_number)d->ue.count);
+
+ rrdset_done(d->st);
+ }
}
return 0;