From 8a7b72f7cd1ccd547a03eb4243294e741d661d3f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 8 Feb 2019 08:30:37 +0100 Subject: Adding upstream version 1.12.0. Signed-off-by: Daniel Baumann --- collectors/python.d.plugin/nvidia_smi/README.md | 3 ++- .../python.d.plugin/nvidia_smi/nvidia_smi.chart.py | 29 ++++++++++++++++------ .../python.d.plugin/nvidia_smi/nvidia_smi.conf | 10 +++----- 3 files changed, 27 insertions(+), 15 deletions(-) (limited to 'collectors/python.d.plugin/nvidia_smi') diff --git a/collectors/python.d.plugin/nvidia_smi/README.md b/collectors/python.d.plugin/nvidia_smi/README.md index 06acfc297..48b611951 100644 --- a/collectors/python.d.plugin/nvidia_smi/README.md +++ b/collectors/python.d.plugin/nvidia_smi/README.md @@ -36,4 +36,5 @@ Sample: ```yaml poll_seconds: 1 -``` \ No newline at end of file +``` +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fcollectors%2Fpython.d.plugin%2Fnvidia_smi%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py index c3fff6219..7cb816c0d 100644 --- a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py +++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py @@ -15,6 +15,8 @@ disabled_by_default = True NVIDIA_SMI = 'nvidia-smi' +BAD_VALUE = 'N/A' + EMPTY_ROW = '' EMPTY_ROW_LIMIT = 500 POLLER_BREAK_ROW = '' @@ -47,39 +49,39 @@ def gpu_charts(gpu): charts = { PCI_BANDWIDTH: { - 'options': [None, 'PCI Express Bandwidth Utilization', 'KB/s', fam, 'nvidia_smi.pci_bandwidth', 'area'], + 'options': [None, 'PCI Express Bandwidth Utilization', 'KiB/s', fam, 'nvidia_smi.pci_bandwidth', 'area'], 'lines': [ ['rx_util', 'rx', 'absolute', 1, 1], ['tx_util', 'tx', 'absolute', 1, -1], ] }, FAN_SPEED: { - 'options': [None, 'Fan Speed', '%', fam, 'nvidia_smi.fan_speed', 'line'], + 'options': [None, 'Fan Speed', 'percentage', fam, 'nvidia_smi.fan_speed', 'line'], 'lines': [ ['fan_speed', 'speed'], ] }, GPU_UTIL: { - 'options': [None, 'GPU Utilization', '%', fam, 'nvidia_smi.gpu_utilization', 'line'], + 'options': [None, 'GPU Utilization', 'percentage', fam, 'nvidia_smi.gpu_utilization', 'line'], 'lines': [ ['gpu_util', 'utilization'], ] }, MEM_UTIL: { - 'options': [None, 'Memory Bandwidth Utilization', '%', fam, 'nvidia_smi.mem_utilization', 'line'], + 'options': [None, 'Memory Bandwidth Utilization', 'percentage', fam, 'nvidia_smi.mem_utilization', 'line'], 'lines': [ ['memory_util', 'utilization'], ] }, ENCODER_UTIL: { - 'options': [None, 'Encoder/Decoder Utilization', '%', fam, 'nvidia_smi.encoder_utilization', 'line'], + 'options': [None, 'Encoder/Decoder Utilization', 'percentage', fam, 'nvidia_smi.encoder_utilization', 'line'], 'lines': [ ['encoder_util', 'encoder'], ['decoder_util', 'decoder'], ] }, MEM_ALLOCATED: { - 'options': [None, 'Memory Allocated', 'MB', fam, 'nvidia_smi.memory_allocated', 'line'], + 'options': [None, 'Memory Allocated', 'MiB', fam, 'nvidia_smi.memory_allocated', 'line'], 'lines': [ ['fb_memory_usage', 'used'], ] @@ -206,6 +208,15 @@ def handle_attr_error(method): return on_call +def handle_value_error(method): + def on_call(*args, **kwargs): + try: + return method(*args, **kwargs) + except ValueError: + return None + return on_call + + class GPU: def __init__(self, num, root): self.num = num @@ -272,6 +283,7 @@ class GPU: def mem_clock(self): return self.root.find('clocks').find('mem_clock').text.split()[0] + @handle_value_error @handle_attr_error def power_draw(self): return float(self.root.find('power_readings').find('power_draw').text.split()[0]) * 100 @@ -294,7 +306,9 @@ class GPU: 'power_draw': self.power_draw(), } - return dict(('gpu{0}_{1}'.format(self.num, k), v) for k, v in data.items() if v is not None) + return dict( + ('gpu{0}_{1}'.format(self.num, k), v) for k, v in data.items() if v is not None and v != BAD_VALUE + ) class Service(SimpleService): @@ -302,7 +316,6 @@ class Service(SimpleService): super(Service, self).__init__(configuration=configuration, name=name) self.order = list() self.definitions = dict() - poll = int(configuration.get('poll_seconds', 1)) self.poller = NvidiaSMIPoller(poll) diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf index e1bcf3faf..53e544a5d 100644 --- a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf +++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf @@ -27,11 +27,9 @@ # If unset, the default for python.d.plugin is used. # priority: 60000 -# retries sets the number of retries to be made in case of failures. -# If unset, the default for python.d.plugin is used. -# Attempts to restore the service are made once every update_every -# and only if the module has collected values in the past. -# retries: 60 +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes # autodetection_retry sets the job re-check interval in seconds. # The job is not deleted if check fails. @@ -58,7 +56,7 @@ # # JOBs sharing a name are mutually exclusive # update_every: 1 # the JOB's data collection frequency # priority: 60000 # the JOB's order on the dashboard -# retries: 60 # the JOB's number of restoration attempts +# penalty: yes # the JOB's penalty # autodetection_retry: 0 # the JOB's re-check interval in seconds # # Additionally to the above, example also supports the following: -- cgit v1.2.3