diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2018-11-07 12:19:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2018-11-07 12:20:17 +0000 |
commit | a64a253794ac64cb40befee54db53bde17dd0d49 (patch) | |
tree | c1024acc5f6e508814b944d99f112259bb28b1be /collectors/python.d.plugin/cpuidle | |
parent | New upstream version 1.10.0+dfsg (diff) | |
download | netdata-a64a253794ac64cb40befee54db53bde17dd0d49.tar.xz netdata-a64a253794ac64cb40befee54db53bde17dd0d49.zip |
New upstream version 1.11.0+dfsgupstream/1.11.0+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/python.d.plugin/cpuidle')
-rw-r--r-- | collectors/python.d.plugin/cpuidle/Makefile.inc | 13 | ||||
-rw-r--r-- | collectors/python.d.plugin/cpuidle/README.md | 11 | ||||
-rw-r--r-- | collectors/python.d.plugin/cpuidle/cpuidle.chart.py | 148 | ||||
-rw-r--r-- | collectors/python.d.plugin/cpuidle/cpuidle.conf | 40 |
4 files changed, 212 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/cpuidle/Makefile.inc b/collectors/python.d.plugin/cpuidle/Makefile.inc new file mode 100644 index 000000000..66c47d3cf --- /dev/null +++ b/collectors/python.d.plugin/cpuidle/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += cpuidle/cpuidle.chart.py +dist_pythonconfig_DATA += cpuidle/cpuidle.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += cpuidle/README.md cpuidle/Makefile.inc + diff --git a/collectors/python.d.plugin/cpuidle/README.md b/collectors/python.d.plugin/cpuidle/README.md new file mode 100644 index 000000000..495169638 --- /dev/null +++ b/collectors/python.d.plugin/cpuidle/README.md @@ -0,0 +1,11 @@ +# cpuidle + +This module monitors the usage of CPU idle states. + +**Requirement:** +Your kernel needs to have `CONFIG_CPU_IDLE` enabled. + +It produces one stacked chart per CPU, showing the percentage of time spent in +each state. + +--- diff --git a/collectors/python.d.plugin/cpuidle/cpuidle.chart.py b/collectors/python.d.plugin/cpuidle/cpuidle.chart.py new file mode 100644 index 000000000..feac025bf --- /dev/null +++ b/collectors/python.d.plugin/cpuidle/cpuidle.chart.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +# Description: cpuidle netdata python.d module +# Author: Steven Noonan (tycho) +# SPDX-License-Identifier: GPL-3.0-or-later + +import ctypes +import glob +import os +import platform + +from bases.FrameworkServices.SimpleService import SimpleService + +syscall = ctypes.CDLL('libc.so.6').syscall + +# default module values (can be overridden per job in `config`) +# update_every = 2 + + +class Service(SimpleService): + def __init__(self, configuration=None, name=None): + prefix = os.getenv('NETDATA_HOST_PREFIX', "") + if prefix.endswith('/'): + prefix = prefix[:-1] + self.sys_dir = prefix + "/sys/devices/system/cpu" + self.schedstat_path = prefix + "/proc/schedstat" + SimpleService.__init__(self, configuration=configuration, name=name) + self.order = [] + self.definitions = {} + self.fake_name = 'cpu' + self.assignment = {} + self.last_schedstat = None + + @staticmethod + def __gettid(): + # This is horrendous. We need the *thread id* (not the *process id*), + # but there's no Python standard library way of doing that. If you need + # to enable this module on a non-x86 machine type, you'll have to find + # the Linux syscall number for gettid() and add it to the dictionary + # below. + syscalls = { + 'i386': 224, + 'x86_64': 186, + } + if platform.machine() not in syscalls: + return None + tid = syscall(syscalls[platform.machine()]) + return tid + + def __wake_cpus(self, cpus): + # Requires Python 3.3+. This will "tickle" each CPU to force it to + # update its idle counters. + if hasattr(os, 'sched_setaffinity'): + pid = self.__gettid() + save_affinity = os.sched_getaffinity(pid) + for idx in cpus: + os.sched_setaffinity(pid, [idx]) + os.sched_getaffinity(pid) + os.sched_setaffinity(pid, save_affinity) + + def __read_schedstat(self): + cpus = {} + for line in open(self.schedstat_path, 'r'): + if not line.startswith('cpu'): + continue + line = line.rstrip().split() + cpu = line[0] + active_time = line[7] + cpus[cpu] = int(active_time) // 1000 + return cpus + + def _get_data(self): + results = {} + + # Use the kernel scheduler stats to determine how much time was spent + # in C0 (active). + schedstat = self.__read_schedstat() + + # Determine if any of the CPUs are idle. If they are, then we need to + # tickle them in order to update their C-state residency statistics. + if self.last_schedstat is None: + needs_tickle = list(self.assignment.keys()) + else: + needs_tickle = [] + for cpu, active_time in self.last_schedstat.items(): + delta = schedstat[cpu] - active_time + if delta < 1: + needs_tickle.append(cpu) + + if needs_tickle: + # This line is critical for the stats to update. If we don't "tickle" + # idle CPUs, then the counters for those CPUs stop counting. + self.__wake_cpus([int(cpu[3:]) for cpu in needs_tickle]) + + # Re-read schedstat now that we've tickled any idlers. + schedstat = self.__read_schedstat() + + self.last_schedstat = schedstat + + for cpu, metrics in self.assignment.items(): + update_time = schedstat[cpu] + results[cpu + '_active_time'] = update_time + + for metric, path in metrics.items(): + residency = int(open(path, 'r').read()) + results[metric] = residency + + return results + + def check(self): + if self.__gettid() is None: + self.error('Cannot get thread ID. Stats would be completely broken.') + return False + + for path in sorted(glob.glob(self.sys_dir + '/cpu*/cpuidle/state*/name')): + # ['', 'sys', 'devices', 'system', 'cpu', 'cpu0', 'cpuidle', 'state3', 'name'] + path_elem = path.split('/') + cpu = path_elem[-4] + state = path_elem[-2] + statename = open(path, 'rt').read().rstrip() + + orderid = '%s_cpuidle' % (cpu,) + if orderid not in self.definitions: + self.order.append(orderid) + active_name = '%s_active_time' % (cpu,) + self.definitions[orderid] = { + 'options': [None, 'C-state residency', 'time%', 'cpuidle', 'cpuidle.cpuidle', 'stacked'], + 'lines': [ + [active_name, 'C0 (active)', 'percentage-of-incremental-row', 1, 1], + ], + } + self.assignment[cpu] = {} + + defid = '%s_%s_time' % (orderid, state) + + self.definitions[orderid]['lines'].append( + [defid, statename, 'percentage-of-incremental-row', 1, 1] + ) + + self.assignment[cpu][defid] = '/'.join(path_elem[:-1] + ['time']) + + # Sort order by kernel-specified CPU index + self.order.sort(key=lambda x: int(x.split('_')[0][3:])) + + if not self.definitions: + self.error("couldn't find cstate stats") + return False + + return True diff --git a/collectors/python.d.plugin/cpuidle/cpuidle.conf b/collectors/python.d.plugin/cpuidle/cpuidle.conf new file mode 100644 index 000000000..bc276fcd2 --- /dev/null +++ b/collectors/python.d.plugin/cpuidle/cpuidle.conf @@ -0,0 +1,40 @@ +# netdata python.d.plugin configuration for cpuidle +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 60 + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 |