Adding upstream version 1.42.0.upstream/1.42.0

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-08-10 09:18:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-08-10 09:18:49 +0000
commit: dd814a7c1a8de056a79f7238578b09236edd5506 (patch)
tree: 429e7eed5a634a4efe9a6877ce66da8e64aa1782 /collectors/python.d.plugin/nvidia_smi
parent: Adding upstream version 1.41.0. (diff)
download: netdata-dd814a7c1a8de056a79f7238578b09236edd5506.tar.xz
netdata-dd814a7c1a8de056a79f7238578b09236edd5506.zip
3 files changed, 190 insertions, 195 deletions
diff --git a/collectors/python.d.plugin/nvidia_smi/metadata.yaml b/collectors/python.d.plugin/nvidia_smi/metadata.yaml
index fc0c90d5f..9bf1e6ca7 100644
--- a/collectors/python.d.plugin/nvidia_smi/metadata.yaml
+++ b/collectors/python.d.plugin/nvidia_smi/metadata.yaml
@@ -1,163 +1,166 @@
-meta:
-  plugin_name: python.d.plugin
-  module_name: nvidia_smi
-  monitored_instance:
-    name: python.d nvidia_smi
-    link: ''
-    categories: []
-    icon_filename: ''
-  related_resources:
-    integrations:
-      list: []
-  info_provided_to_referring_integrations:
-    description: ''
-  keywords: []
-  most_popular: false
-overview:
-  data_collection:
-    metrics_description: ''
-    method_description: ''
-  supported_platforms:
-    include: []
-    exclude: []
-  multi-instance: true
-  additional_permissions:
-    description: ''
-  default_behavior:
-    auto_detection:
-      description: ''
-    limits:
-      description: ''
-    performance_impact:
-      description: ''
-setup:
-  prerequisites:
-    list: []
-  configuration:
-    file:
-      name: ''
-      description: ''
-    options:
-      description: ''
-      folding:
-        title: ''
-        enabled: true
-      list: []
-    examples:
-      folding:
-        enabled: true
-        title: ''
-      list: []
-troubleshooting:
-  problems:
-    list: []
-alerts: []
-metrics:
-  folding:
-    title: Metrics
-    enabled: false
-  description: ""
-  availability: []
-  scopes:
-  - name: GPU
-    description: ""
-    labels: []
-    metrics:
-    - name: nvidia_smi.pci_bandwidth
-      description: PCI Express Bandwidth Utilization
-      unit: "KiB/s"
-      chart_type: area
-      dimensions:
-      - name: rx
-      - name: tx
-    - name: nvidia_smi.pci_bandwidth_percent
-      description: PCI Express Bandwidth Percent
-      unit: "percentage"
-      chart_type: area
-      dimensions:
-      - name: rx_percent
-      - name: tx_percent
-    - name: nvidia_smi.fan_speed
-      description: Fan Speed
-      unit: "percentage"
-      chart_type: line
-      dimensions:
-      - name: speed
-    - name: nvidia_smi.gpu_utilization
-      description: GPU Utilization
-      unit: "percentage"
-      chart_type: line
-      dimensions:
-      - name: utilization
-    - name: nvidia_smi.mem_utilization
-      description: Memory Bandwidth Utilization
-      unit: "percentage"
-      chart_type: line
-      dimensions:
-      - name: utilization
-    - name: nvidia_smi.encoder_utilization
-      description: Encoder/Decoder Utilization
-      unit: "percentage"
-      chart_type: line
-      dimensions:
-      - name: encoder
-      - name: decoder
-    - name: nvidia_smi.memory_allocated
-      description: Memory Usage
-      unit: "MiB"
-      chart_type: stacked
-      dimensions:
-      - name: free
-      - name: used
-    - name: nvidia_smi.bar1_memory_usage
-      description: Bar1 Memory Usage
-      unit: "MiB"
-      chart_type: stacked
-      dimensions:
-      - name: free
-      - name: used
-    - name: nvidia_smi.temperature
-      description: Temperature
-      unit: "celsius"
-      chart_type: line
-      dimensions:
-      - name: temp
-    - name: nvidia_smi.clocks
-      description: Clock Frequencies
-      unit: "MHz"
-      chart_type: line
-      dimensions:
-      - name: graphics
-      - name: video
-      - name: sm
-      - name: mem
-    - name: nvidia_smi.power
-      description: Power Utilization
-      unit: "Watts"
-      chart_type: line
-      dimensions:
-      - name: power
-    - name: nvidia_smi.power_state
-      description: Power State
-      unit: "state"
-      chart_type: line
-      dimensions:
-      - name: a dimension per {power_state}
-    - name: nvidia_smi.processes_mem
-      description: Memory Used by Each Process
-      unit: "MiB"
-      chart_type: stacked
-      dimensions:
-      - name: a dimension per process
-    - name: nvidia_smi.user_mem
-      description: Memory Used by Each User
-      unit: "MiB"
-      chart_type: stacked
-      dimensions:
-      - name: a dimension per user
-    - name: nvidia_smi.user_num
-      description: Number of User on GPU
-      unit: "num"
-      chart_type: line
-      dimensions:
-      - name: users
+# This collector will not appear in documentation, as the go version is preferred,
+# https://github.com/netdata/go.d.plugin/blob/master/modules/nvidia_smi/README.md
+#
+# meta:
+#   plugin_name: python.d.plugin
+#   module_name: nvidia_smi
+#   monitored_instance:
+#     name: python.d nvidia_smi
+#     link: ''
+#     categories: []
+#     icon_filename: ''
+#   related_resources:
+#     integrations:
+#       list: []
+#   info_provided_to_referring_integrations:
+#     description: ''
+#   keywords: []
+#   most_popular: false
+# overview:
+#   data_collection:
+#     metrics_description: ''
+#     method_description: ''
+#   supported_platforms:
+#     include: []
+#     exclude: []
+#   multi_instance: true
+#   additional_permissions:
+#     description: ''
+#   default_behavior:
+#     auto_detection:
+#       description: ''
+#     limits:
+#       description: ''
+#     performance_impact:
+#       description: ''
+# setup:
+#   prerequisites:
+#     list: []
+#   configuration:
+#     file:
+#       name: ''
+#       description: ''
+#     options:
+#       description: ''
+#       folding:
+#         title: ''
+#         enabled: true
+#       list: []
+#     examples:
+#       folding:
+#         enabled: true
+#         title: ''
+#       list: []
+# troubleshooting:
+#   problems:
+#     list: []
+# alerts: []
+# metrics:
+#   folding:
+#     title: Metrics
+#     enabled: false
+#   description: ""
+#   availability: []
+#   scopes:
+#   - name: GPU
+#     description: ""
+#     labels: []
+#     metrics:
+#     - name: nvidia_smi.pci_bandwidth
+#       description: PCI Express Bandwidth Utilization
+#       unit: "KiB/s"
+#       chart_type: area
+#       dimensions:
+#       - name: rx
+#       - name: tx
+#     - name: nvidia_smi.pci_bandwidth_percent
+#       description: PCI Express Bandwidth Percent
+#       unit: "percentage"
+#       chart_type: area
+#       dimensions:
+#       - name: rx_percent
+#       - name: tx_percent
+#     - name: nvidia_smi.fan_speed
+#       description: Fan Speed
+#       unit: "percentage"
+#       chart_type: line
+#       dimensions:
+#       - name: speed
+#     - name: nvidia_smi.gpu_utilization
+#       description: GPU Utilization
+#       unit: "percentage"
+#       chart_type: line
+#       dimensions:
+#       - name: utilization
+#     - name: nvidia_smi.mem_utilization
+#       description: Memory Bandwidth Utilization
+#       unit: "percentage"
+#       chart_type: line
+#       dimensions:
+#       - name: utilization
+#     - name: nvidia_smi.encoder_utilization
+#       description: Encoder/Decoder Utilization
+#       unit: "percentage"
+#       chart_type: line
+#       dimensions:
+#       - name: encoder
+#       - name: decoder
+#     - name: nvidia_smi.memory_allocated
+#       description: Memory Usage
+#       unit: "MiB"
+#       chart_type: stacked
+#       dimensions:
+#       - name: free
+#       - name: used
+#     - name: nvidia_smi.bar1_memory_usage
+#       description: Bar1 Memory Usage
+#       unit: "MiB"
+#       chart_type: stacked
+#       dimensions:
+#       - name: free
+#       - name: used
+#     - name: nvidia_smi.temperature
+#       description: Temperature
+#       unit: "celsius"
+#       chart_type: line
+#       dimensions:
+#       - name: temp
+#     - name: nvidia_smi.clocks
+#       description: Clock Frequencies
+#       unit: "MHz"
+#       chart_type: line
+#       dimensions:
+#       - name: graphics
+#       - name: video
+#       - name: sm
+#       - name: mem
+#     - name: nvidia_smi.power
+#       description: Power Utilization
+#       unit: "Watts"
+#       chart_type: line
+#       dimensions:
+#       - name: power
+#     - name: nvidia_smi.power_state
+#       description: Power State
+#       unit: "state"
+#       chart_type: line
+#       dimensions:
+#       - name: a dimension per {power_state}
+#     - name: nvidia_smi.processes_mem
+#       description: Memory Used by Each Process
+#       unit: "MiB"
+#       chart_type: stacked
+#       dimensions:
+#       - name: a dimension per process
+#     - name: nvidia_smi.user_mem
+#       description: Memory Used by Each User
+#       unit: "MiB"
+#       chart_type: stacked
+#       dimensions:
+#       - name: a dimension per user
+#     - name: nvidia_smi.user_num
+#       description: Number of User on GPU
+#       unit: "num"
+#       chart_type: line
+#       dimensions:
+#       - name: users
diff --git a/collectors/python.d.plugin/nvidia_smi/metrics.csv b/collectors/python.d.plugin/nvidia_smi/metrics.csv
deleted file mode 100644
index 683ea5650..000000000
--- a/collectors/python.d.plugin/nvidia_smi/metrics.csv
+++ /dev/null
@@ -1,16 +0,0 @@
-metric,scope,dimensions,unit,description,chart_type,labels,plugin,module
-nvidia_smi.pci_bandwidth,GPU,"rx, tx",KiB/s,PCI Express Bandwidth Utilization,area,,python.d.plugin,nvidia_smi
-nvidia_smi.pci_bandwidth_percent,GPU,"rx_percent, tx_percent",percentage,PCI Express Bandwidth Percent,area,,python.d.plugin,nvidia_smi
-nvidia_smi.fan_speed,GPU,speed,percentage,Fan Speed,line,,python.d.plugin,nvidia_smi
-nvidia_smi.gpu_utilization,GPU,utilization,percentage,GPU Utilization,line,,python.d.plugin,nvidia_smi
-nvidia_smi.mem_utilization,GPU,utilization,percentage,Memory Bandwidth Utilization,line,,python.d.plugin,nvidia_smi
-nvidia_smi.encoder_utilization,GPU,"encoder, decoder",percentage,Encoder/Decoder Utilization,line,,python.d.plugin,nvidia_smi
-nvidia_smi.memory_allocated,GPU,"free, used",MiB,Memory Usage,stacked,,python.d.plugin,nvidia_smi
-nvidia_smi.bar1_memory_usage,GPU,"free, used",MiB,Bar1 Memory Usage,stacked,,python.d.plugin,nvidia_smi
-nvidia_smi.temperature,GPU,temp,celsius,Temperature,line,,python.d.plugin,nvidia_smi
-nvidia_smi.clocks,GPU,"graphics, video, sm, mem",MHz,Clock Frequencies,line,,python.d.plugin,nvidia_smi
-nvidia_smi.power,GPU,power,Watts,Power Utilization,line,,python.d.plugin,nvidia_smi
-nvidia_smi.power_state,GPU,a dimension per {power_state},state,Power State,line,,python.d.plugin,nvidia_smi
-nvidia_smi.processes_mem,GPU,a dimension per process,MiB,Memory Used by Each Process,stacked,,python.d.plugin,nvidia_smi
-nvidia_smi.user_mem,GPU,a dimension per user,MiB,Memory Used by Each User,stacked,,python.d.plugin,nvidia_smi
-nvidia_smi.user_num,GPU,users,num,Number of User on GPU,line,,python.d.plugin,nvidia_smi
diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
index 271c99638..556a61435 100644
--- a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
+++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
@@ -62,20 +62,22 @@ POWER_STATES = ['P' + str(i) for i in range(0, 16)]
 
 # PCI Transfer data rate in gigabits per second (Gb/s) per generation
 PCI_SPEED = {
-  "1": 2.5,
-  "2": 5,
-  "3": 8,
-  "4": 16,
-  "5": 32
+    "1": 2.5,
+    "2": 5,
+    "3": 8,
+    "4": 16,
+    "5": 32
 }
 # PCI encoding per generation
 PCI_ENCODING = {
-  "1": 2/10,
-  "2": 2/10,
-  "3": 2/130,
-  "4": 2/130,
-  "5": 2/130
+    "1": 2 / 10,
+    "2": 2 / 10,
+    "3": 2 / 130,
+    "4": 2 / 130,
+    "5": 2 / 130
 }
+
+
 def gpu_charts(gpu):
     fam = gpu.full_name()
 
@@ -88,7 +90,8 @@ def gpu_charts(gpu):
             ]
         },
         PCI_BANDWIDTH_PERCENT: {
-            'options': [None, 'PCI Express Bandwidth Percent', 'percentage', fam, 'nvidia_smi.pci_bandwidth_percent', 'area'],
+            'options': [None, 'PCI Express Bandwidth Percent', 'percentage', fam, 'nvidia_smi.pci_bandwidth_percent',
+                        'area'],
             'lines': [
                 ['rx_util_percent', 'rx_percent'],
                 ['tx_util_percent', 'tx_percent'],
@@ -358,7 +361,8 @@ class GPU:
 
     @handle_attr_error
     def pci_link_width(self):
-        return self.root.find('pci').find('pci_gpu_link_info').find('link_widths').find('max_link_width').text.split('x')[0]
+        info = self.root.find('pci').find('pci_gpu_link_info')
+        return info.find('link_widths').find('max_link_width').text.split('x')[0]
 
     def pci_bw_max(self):
         link_gen = self.pci_link_gen()
@@ -368,7 +372,7 @@ class GPU:
         # Maximum PCIe Bandwidth = SPEED * WIDTH * (1 - ENCODING) - 1Gb/s.
         # see details https://enterprise-support.nvidia.com/s/article/understanding-pcie-configuration-for-maximum-performance
         # return max bandwidth in kilobytes per second (kB/s)
-        return (PCI_SPEED[link_gen] * link_width * (1- PCI_ENCODING[link_gen]) - 1) * 1000 * 1000 / 8
+        return (PCI_SPEED[link_gen] * link_width * (1 - PCI_ENCODING[link_gen]) - 1) * 1000 * 1000 / 8
 
     @handle_attr_error
     def rx_util(self):
@@ -435,13 +439,18 @@ class GPU:
         return self.root.find('clocks').find('mem_clock').text.split()[0]
 
     @handle_attr_error
+    def power_readings(self):
+        elem = self.root.find('power_readings')
+        return elem if elem else self.root.find('gpu_power_readings')
+
+    @handle_attr_error
     def power_state(self):
-        return str(self.root.find('power_readings').find('power_state').text.split()[0])
+        return str(self.power_readings().find('power_state').text.split()[0])
 
     @handle_value_error
     @handle_attr_error
     def power_draw(self):
-        return float(self.root.find('power_readings').find('power_draw').text.split()[0]) * 100
+        return float(self.power_readings().find('power_draw').text.split()[0]) * 100
 
     @handle_attr_error
     def processes(self):
@@ -492,7 +501,6 @@ class GPU:
                 data['rx_util_percent'] = str(int(int(self.rx_util()) * 100 / self.pci_bw_max()))
                 data['tx_util_percent'] = str(int(int(self.tx_util()) * 100 / self.pci_bw_max()))
 
-
         for v in POWER_STATES:
             data['power_state_' + v.lower()] = 0
         p_state = self.power_state()
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-08-10 09:18:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-08-10 09:18:49 +0000
commit	dd814a7c1a8de056a79f7238578b09236edd5506 (patch)
tree	429e7eed5a634a4efe9a6877ce66da8e64aa1782 /collectors/python.d.plugin/nvidia_smi
parent	Adding upstream version 1.41.0. (diff)
download	netdata-dd814a7c1a8de056a79f7238578b09236edd5506.tar.xz netdata-dd814a7c1a8de056a79f7238578b09236edd5506.zip