summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py')
-rw-r--r--collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py53
1 files changed, 52 insertions, 1 deletions
diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
index 23e90e658..6affae7b8 100644
--- a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
+++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
@@ -22,6 +22,7 @@ EMPTY_ROW_LIMIT = 500
POLLER_BREAK_ROW = '</nvidia_smi_log>'
PCI_BANDWIDTH = 'pci_bandwidth'
+PCI_BANDWIDTH_PERCENT = 'pci_bandwidth_percent'
FAN_SPEED = 'fan_speed'
GPU_UTIL = 'gpu_utilization'
MEM_UTIL = 'mem_utilization'
@@ -38,6 +39,7 @@ USER_NUM = 'user_num'
ORDER = [
PCI_BANDWIDTH,
+ PCI_BANDWIDTH_PERCENT,
FAN_SPEED,
GPU_UTIL,
MEM_UTIL,
@@ -56,7 +58,22 @@ ORDER = [
# https://docs.nvidia.com/gameworks/content/gameworkslibrary/coresdk/nvapi/group__gpupstate.html
POWER_STATES = ['P' + str(i) for i in range(0, 16)]
-
+# PCI Transfer data rate in gigabits per second (Gb/s) per generation
+PCI_SPEED = {
+ "1": 2.5,
+ "2": 5,
+ "3": 8,
+ "4": 16,
+ "5": 32
+}
+# PCI encoding per generation
+PCI_ENCODING = {
+ "1": 2/10,
+ "2": 2/10,
+ "3": 2/130,
+ "4": 2/130,
+ "5": 2/130
+}
def gpu_charts(gpu):
fam = gpu.full_name()
@@ -68,6 +85,13 @@ def gpu_charts(gpu):
['tx_util', 'tx', 'absolute', 1, -1],
]
},
+ PCI_BANDWIDTH_PERCENT: {
+ 'options': [None, 'PCI Express Bandwidth Percent', 'percentage', fam, 'nvidia_smi.pci_bandwidth_percent', 'area'],
+ 'lines': [
+ ['rx_util_percent', 'rx_percent'],
+ ['tx_util_percent', 'tx_percent'],
+ ]
+ },
FAN_SPEED: {
'options': [None, 'Fan Speed', 'percentage', fam, 'nvidia_smi.fan_speed', 'line'],
'lines': [
@@ -327,6 +351,24 @@ class GPU:
return 'gpu{0} {1}'.format(self.num, self.name())
@handle_attr_error
+ def pci_link_gen(self):
+ return self.root.find('pci').find('pci_gpu_link_info').find('pcie_gen').find('max_link_gen').text
+
+ @handle_attr_error
+ def pci_link_width(self):
+ return self.root.find('pci').find('pci_gpu_link_info').find('link_widths').find('max_link_width').text.split('x')[0]
+
+ def pci_bw_max(self):
+ link_gen = self.pci_link_gen()
+ link_width = int(self.pci_link_width())
+ if link_gen not in PCI_SPEED or link_gen not in PCI_ENCODING or not link_width:
+ return None
+ # Maximum PCIe Bandwidth = SPEED * WIDTH * (1 - ENCODING) - 1Gb/s.
+ # see details https://enterprise-support.nvidia.com/s/article/understanding-pcie-configuration-for-maximum-performance
+ # return max bandwidth in kilobytes per second (kB/s)
+ return (PCI_SPEED[link_gen] * link_width * (1- PCI_ENCODING[link_gen]) - 1) * 1000 * 1000 / 8
+
+ @handle_attr_error
def rx_util(self):
return self.root.find('pci').find('rx_util').text.split()[0]
@@ -439,6 +481,15 @@ class GPU:
'power_draw': self.power_draw(),
}
+ pci_bw_max = self.pci_bw_max()
+ if not pci_bw_max:
+ data['rx_util_percent'] = 0
+ data['tx_util_percent'] = 0
+ else :
+ data['rx_util_percent'] = str(int(int(self.rx_util())*100/self.pci_bw_max()))
+ data['tx_util_percent'] = str(int(int(self.tx_util())*100/self.pci_bw_max()))
+
+
for v in POWER_STATES:
data['power_state_' + v.lower()] = 0
p_state = self.power_state()