3 files changed, 27 insertions, 15 deletions
diff --git a/collectors/python.d.plugin/nvidia_smi/README.md b/collectors/python.d.plugin/nvidia_smi/README.md
index 06acfc297..48b611951 100644
--- a/collectors/python.d.plugin/nvidia_smi/README.md
+++ b/collectors/python.d.plugin/nvidia_smi/README.md
@@ -36,4 +36,5 @@ Sample:
 
 ```yaml
 poll_seconds: 1
-```
-\ No newline at end of file
+```
+[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fcollectors%2Fpython.d.plugin%2Fnvidia_smi%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]()
diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
index c3fff6219..7cb816c0d 100644
--- a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
+++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py
@@ -15,6 +15,8 @@ disabled_by_default = True
 
 NVIDIA_SMI = 'nvidia-smi'
 
+BAD_VALUE = 'N/A'
+
 EMPTY_ROW = ''
 EMPTY_ROW_LIMIT = 500
 POLLER_BREAK_ROW = '</nvidia_smi_log>'
@@ -47,39 +49,39 @@ def gpu_charts(gpu):
 
     charts = {
         PCI_BANDWIDTH: {
-            'options': [None, 'PCI Express Bandwidth Utilization', 'KB/s', fam, 'nvidia_smi.pci_bandwidth', 'area'],
+            'options': [None, 'PCI Express Bandwidth Utilization', 'KiB/s', fam, 'nvidia_smi.pci_bandwidth', 'area'],
             'lines': [
                 ['rx_util', 'rx', 'absolute', 1, 1],
                 ['tx_util', 'tx', 'absolute', 1, -1],
             ]
         },
         FAN_SPEED: {
-            'options': [None, 'Fan Speed', '%', fam, 'nvidia_smi.fan_speed', 'line'],
+            'options': [None, 'Fan Speed', 'percentage', fam, 'nvidia_smi.fan_speed', 'line'],
             'lines': [
                 ['fan_speed', 'speed'],
             ]
         },
         GPU_UTIL: {
-            'options': [None, 'GPU Utilization', '%', fam, 'nvidia_smi.gpu_utilization', 'line'],
+            'options': [None, 'GPU Utilization', 'percentage', fam, 'nvidia_smi.gpu_utilization', 'line'],
             'lines': [
                 ['gpu_util', 'utilization'],
             ]
         },
         MEM_UTIL: {
-            'options': [None, 'Memory Bandwidth Utilization', '%', fam, 'nvidia_smi.mem_utilization', 'line'],
+            'options': [None, 'Memory Bandwidth Utilization', 'percentage', fam, 'nvidia_smi.mem_utilization', 'line'],
             'lines': [
                 ['memory_util', 'utilization'],
             ]
         },
         ENCODER_UTIL: {
-            'options': [None, 'Encoder/Decoder Utilization', '%', fam, 'nvidia_smi.encoder_utilization', 'line'],
+            'options': [None, 'Encoder/Decoder Utilization', 'percentage', fam, 'nvidia_smi.encoder_utilization', 'line'],
             'lines': [
                 ['encoder_util', 'encoder'],
                 ['decoder_util', 'decoder'],
             ]
         },
         MEM_ALLOCATED: {
-            'options': [None, 'Memory Allocated', 'MB', fam, 'nvidia_smi.memory_allocated', 'line'],
+            'options': [None, 'Memory Allocated', 'MiB', fam, 'nvidia_smi.memory_allocated', 'line'],
             'lines': [
                 ['fb_memory_usage', 'used'],
             ]
@@ -206,6 +208,15 @@ def handle_attr_error(method):
     return on_call
 
 
+def handle_value_error(method):
+    def on_call(*args, **kwargs):
+        try:
+            return method(*args, **kwargs)
+        except ValueError:
+            return None
+    return on_call
+
+
 class GPU:
     def __init__(self, num, root):
         self.num = num
@@ -272,6 +283,7 @@ class GPU:
     def mem_clock(self):
         return self.root.find('clocks').find('mem_clock').text.split()[0]
 
+    @handle_value_error
     @handle_attr_error
     def power_draw(self):
         return float(self.root.find('power_readings').find('power_draw').text.split()[0]) * 100
@@ -294,7 +306,9 @@ class GPU:
             'power_draw': self.power_draw(),
         }
 
-        return dict(('gpu{0}_{1}'.format(self.num, k), v) for k, v in data.items() if v is not None)
+        return dict(
+            ('gpu{0}_{1}'.format(self.num, k), v) for k, v in data.items() if v is not None and v != BAD_VALUE
+        )
 
 
 class Service(SimpleService):
@@ -302,7 +316,6 @@ class Service(SimpleService):
         super(Service, self).__init__(configuration=configuration, name=name)
         self.order = list()
         self.definitions = dict()
-
         poll = int(configuration.get('poll_seconds', 1))
         self.poller = NvidiaSMIPoller(poll)
 
diff --git a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf
index e1bcf3faf..53e544a5d 100644
--- a/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf
+++ b/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf
@@ -27,11 +27,9 @@
 # If unset, the default for python.d.plugin is used.
 # priority: 60000
 
-# retries sets the number of retries to be made in case of failures.
-# If unset, the default for python.d.plugin is used.
-# Attempts to restore the service are made once every update_every
-# and only if the module has collected values in the past.
-# retries: 60
+# penalty indicates whether to apply penalty to update_every in case of failures.
+# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes.
+# penalty: yes
 
 # autodetection_retry sets the job re-check interval in seconds.
 # The job is not deleted if check fails.
@@ -58,7 +56,7 @@
 #                             # JOBs sharing a name are mutually exclusive
 #     update_every: 1         # the JOB's data collection frequency
 #     priority: 60000         # the JOB's order on the dashboard
-#     retries: 60             # the JOB's number of restoration attempts
+#     penalty: yes            # the JOB's penalty
 #     autodetection_retry: 0  # the JOB's re-check interval in seconds
 #
 # Additionally to the above, example also supports the following: