summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/cpuidle
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2018-11-07 12:19:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2018-11-07 12:20:17 +0000
commita64a253794ac64cb40befee54db53bde17dd0d49 (patch)
treec1024acc5f6e508814b944d99f112259bb28b1be /collectors/python.d.plugin/cpuidle
parentNew upstream version 1.10.0+dfsg (diff)
downloadnetdata-a64a253794ac64cb40befee54db53bde17dd0d49.tar.xz
netdata-a64a253794ac64cb40befee54db53bde17dd0d49.zip
New upstream version 1.11.0+dfsgupstream/1.11.0+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/python.d.plugin/cpuidle')
-rw-r--r--collectors/python.d.plugin/cpuidle/Makefile.inc13
-rw-r--r--collectors/python.d.plugin/cpuidle/README.md11
-rw-r--r--collectors/python.d.plugin/cpuidle/cpuidle.chart.py148
-rw-r--r--collectors/python.d.plugin/cpuidle/cpuidle.conf40
4 files changed, 212 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/cpuidle/Makefile.inc b/collectors/python.d.plugin/cpuidle/Makefile.inc
new file mode 100644
index 000000000..66c47d3cf
--- /dev/null
+++ b/collectors/python.d.plugin/cpuidle/Makefile.inc
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# THIS IS NOT A COMPLETE Makefile
+# IT IS INCLUDED BY ITS PARENT'S Makefile.am
+# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
+
+# install these files
+dist_python_DATA += cpuidle/cpuidle.chart.py
+dist_pythonconfig_DATA += cpuidle/cpuidle.conf
+
+# do not install these files, but include them in the distribution
+dist_noinst_DATA += cpuidle/README.md cpuidle/Makefile.inc
+
diff --git a/collectors/python.d.plugin/cpuidle/README.md b/collectors/python.d.plugin/cpuidle/README.md
new file mode 100644
index 000000000..495169638
--- /dev/null
+++ b/collectors/python.d.plugin/cpuidle/README.md
@@ -0,0 +1,11 @@
+# cpuidle
+
+This module monitors the usage of CPU idle states.
+
+**Requirement:**
+Your kernel needs to have `CONFIG_CPU_IDLE` enabled.
+
+It produces one stacked chart per CPU, showing the percentage of time spent in
+each state.
+
+---
diff --git a/collectors/python.d.plugin/cpuidle/cpuidle.chart.py b/collectors/python.d.plugin/cpuidle/cpuidle.chart.py
new file mode 100644
index 000000000..feac025bf
--- /dev/null
+++ b/collectors/python.d.plugin/cpuidle/cpuidle.chart.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+# Description: cpuidle netdata python.d module
+# Author: Steven Noonan (tycho)
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import ctypes
+import glob
+import os
+import platform
+
+from bases.FrameworkServices.SimpleService import SimpleService
+
+syscall = ctypes.CDLL('libc.so.6').syscall
+
+# default module values (can be overridden per job in `config`)
+# update_every = 2
+
+
+class Service(SimpleService):
+ def __init__(self, configuration=None, name=None):
+ prefix = os.getenv('NETDATA_HOST_PREFIX', "")
+ if prefix.endswith('/'):
+ prefix = prefix[:-1]
+ self.sys_dir = prefix + "/sys/devices/system/cpu"
+ self.schedstat_path = prefix + "/proc/schedstat"
+ SimpleService.__init__(self, configuration=configuration, name=name)
+ self.order = []
+ self.definitions = {}
+ self.fake_name = 'cpu'
+ self.assignment = {}
+ self.last_schedstat = None
+
+ @staticmethod
+ def __gettid():
+ # This is horrendous. We need the *thread id* (not the *process id*),
+ # but there's no Python standard library way of doing that. If you need
+ # to enable this module on a non-x86 machine type, you'll have to find
+ # the Linux syscall number for gettid() and add it to the dictionary
+ # below.
+ syscalls = {
+ 'i386': 224,
+ 'x86_64': 186,
+ }
+ if platform.machine() not in syscalls:
+ return None
+ tid = syscall(syscalls[platform.machine()])
+ return tid
+
+ def __wake_cpus(self, cpus):
+ # Requires Python 3.3+. This will "tickle" each CPU to force it to
+ # update its idle counters.
+ if hasattr(os, 'sched_setaffinity'):
+ pid = self.__gettid()
+ save_affinity = os.sched_getaffinity(pid)
+ for idx in cpus:
+ os.sched_setaffinity(pid, [idx])
+ os.sched_getaffinity(pid)
+ os.sched_setaffinity(pid, save_affinity)
+
+ def __read_schedstat(self):
+ cpus = {}
+ for line in open(self.schedstat_path, 'r'):
+ if not line.startswith('cpu'):
+ continue
+ line = line.rstrip().split()
+ cpu = line[0]
+ active_time = line[7]
+ cpus[cpu] = int(active_time) // 1000
+ return cpus
+
+ def _get_data(self):
+ results = {}
+
+ # Use the kernel scheduler stats to determine how much time was spent
+ # in C0 (active).
+ schedstat = self.__read_schedstat()
+
+ # Determine if any of the CPUs are idle. If they are, then we need to
+ # tickle them in order to update their C-state residency statistics.
+ if self.last_schedstat is None:
+ needs_tickle = list(self.assignment.keys())
+ else:
+ needs_tickle = []
+ for cpu, active_time in self.last_schedstat.items():
+ delta = schedstat[cpu] - active_time
+ if delta < 1:
+ needs_tickle.append(cpu)
+
+ if needs_tickle:
+ # This line is critical for the stats to update. If we don't "tickle"
+ # idle CPUs, then the counters for those CPUs stop counting.
+ self.__wake_cpus([int(cpu[3:]) for cpu in needs_tickle])
+
+ # Re-read schedstat now that we've tickled any idlers.
+ schedstat = self.__read_schedstat()
+
+ self.last_schedstat = schedstat
+
+ for cpu, metrics in self.assignment.items():
+ update_time = schedstat[cpu]
+ results[cpu + '_active_time'] = update_time
+
+ for metric, path in metrics.items():
+ residency = int(open(path, 'r').read())
+ results[metric] = residency
+
+ return results
+
+ def check(self):
+ if self.__gettid() is None:
+ self.error('Cannot get thread ID. Stats would be completely broken.')
+ return False
+
+ for path in sorted(glob.glob(self.sys_dir + '/cpu*/cpuidle/state*/name')):
+ # ['', 'sys', 'devices', 'system', 'cpu', 'cpu0', 'cpuidle', 'state3', 'name']
+ path_elem = path.split('/')
+ cpu = path_elem[-4]
+ state = path_elem[-2]
+ statename = open(path, 'rt').read().rstrip()
+
+ orderid = '%s_cpuidle' % (cpu,)
+ if orderid not in self.definitions:
+ self.order.append(orderid)
+ active_name = '%s_active_time' % (cpu,)
+ self.definitions[orderid] = {
+ 'options': [None, 'C-state residency', 'time%', 'cpuidle', 'cpuidle.cpuidle', 'stacked'],
+ 'lines': [
+ [active_name, 'C0 (active)', 'percentage-of-incremental-row', 1, 1],
+ ],
+ }
+ self.assignment[cpu] = {}
+
+ defid = '%s_%s_time' % (orderid, state)
+
+ self.definitions[orderid]['lines'].append(
+ [defid, statename, 'percentage-of-incremental-row', 1, 1]
+ )
+
+ self.assignment[cpu][defid] = '/'.join(path_elem[:-1] + ['time'])
+
+ # Sort order by kernel-specified CPU index
+ self.order.sort(key=lambda x: int(x.split('_')[0][3:]))
+
+ if not self.definitions:
+ self.error("couldn't find cstate stats")
+ return False
+
+ return True
diff --git a/collectors/python.d.plugin/cpuidle/cpuidle.conf b/collectors/python.d.plugin/cpuidle/cpuidle.conf
new file mode 100644
index 000000000..bc276fcd2
--- /dev/null
+++ b/collectors/python.d.plugin/cpuidle/cpuidle.conf
@@ -0,0 +1,40 @@
+# netdata python.d.plugin configuration for cpuidle
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0