From 836b47cb7e99a977c5a23b059ca1d0b5065d310e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 24 Jul 2024 11:54:23 +0200 Subject: Merging upstream version 1.46.3. Signed-off-by: Daniel Baumann --- src/collectors/python.d.plugin/gearman/README.md | 1 + .../python.d.plugin/gearman/gearman.chart.py | 243 +++++++++++++++++++++ .../python.d.plugin/gearman/gearman.conf | 75 +++++++ .../gearman/integrations/gearman.md | 210 ++++++++++++++++++ .../python.d.plugin/gearman/metadata.yaml | 168 ++++++++++++++ 5 files changed, 697 insertions(+) create mode 120000 src/collectors/python.d.plugin/gearman/README.md create mode 100644 src/collectors/python.d.plugin/gearman/gearman.chart.py create mode 100644 src/collectors/python.d.plugin/gearman/gearman.conf create mode 100644 src/collectors/python.d.plugin/gearman/integrations/gearman.md create mode 100644 src/collectors/python.d.plugin/gearman/metadata.yaml (limited to 'src/collectors/python.d.plugin/gearman') diff --git a/src/collectors/python.d.plugin/gearman/README.md b/src/collectors/python.d.plugin/gearman/README.md new file mode 120000 index 000000000..70189d698 --- /dev/null +++ b/src/collectors/python.d.plugin/gearman/README.md @@ -0,0 +1 @@ +integrations/gearman.md \ No newline at end of file diff --git a/src/collectors/python.d.plugin/gearman/gearman.chart.py b/src/collectors/python.d.plugin/gearman/gearman.chart.py new file mode 100644 index 000000000..5e280a4d8 --- /dev/null +++ b/src/collectors/python.d.plugin/gearman/gearman.chart.py @@ -0,0 +1,243 @@ +# Description: dovecot netdata python.d module +# Author: Kyle Agronick (agronick) +# SPDX-License-Identifier: GPL-3.0+ + +# Gearman Netdata Plugin + +from copy import deepcopy + +from bases.FrameworkServices.SocketService import SocketService + +CHARTS = { + 'total_workers': { + 'options': [None, 'Total Jobs', 'Jobs', 'Total Jobs', 'gearman.total_jobs', 'line'], + 'lines': [ + ['total_pending', 'Pending', 'absolute'], + ['total_running', 'Running', 'absolute'], + ] + }, +} + + +def job_chart_template(job_name): + return { + 'options': [None, job_name, 'Jobs', 'Activity by Job', 'gearman.single_job', 'stacked'], + 'lines': [ + ['{0}_pending'.format(job_name), 'Pending', 'absolute'], + ['{0}_idle'.format(job_name), 'Idle', 'absolute'], + ['{0}_running'.format(job_name), 'Running', 'absolute'], + ] + } + + +def build_result_dict(job): + """ + Get the status for each job + :return: dict + """ + + total, running, available = job['metrics'] + + idle = available - running + pending = total - running + + return { + '{0}_pending'.format(job['job_name']): pending, + '{0}_idle'.format(job['job_name']): idle, + '{0}_running'.format(job['job_name']): running, + } + + +def parse_worker_data(job): + job_name = job[0] + job_metrics = job[1:] + + return { + 'job_name': job_name, + 'metrics': job_metrics, + } + + +class GearmanReadException(BaseException): + pass + + +class Service(SocketService): + def __init__(self, configuration=None, name=None): + super(Service, self).__init__(configuration=configuration, name=name) + self.request = "status\n" + self._keep_alive = True + + self.host = self.configuration.get('host', 'localhost') + self.port = self.configuration.get('port', 4730) + + self.tls = self.configuration.get('tls', False) + self.cert = self.configuration.get('cert', None) + self.key = self.configuration.get('key', None) + + self.active_jobs = set() + self.definitions = deepcopy(CHARTS) + self.order = ['total_workers'] + + def _get_data(self): + """ + Format data received from socket + :return: dict + """ + + try: + active_jobs = self.get_active_jobs() + except GearmanReadException: + return None + + found_jobs, job_data = self.process_jobs(active_jobs) + self.remove_stale_jobs(found_jobs) + return job_data + + def get_active_jobs(self): + active_jobs = [] + + for job in self.get_worker_data(): + parsed_job = parse_worker_data(job) + + # Gearman does not clean up old jobs + # We only care about jobs that have + # some relevant data + if not any(parsed_job['metrics']): + continue + + active_jobs.append(parsed_job) + + return active_jobs + + def get_worker_data(self): + """ + Split the data returned from Gearman + into a list of lists + + This returns the same output that you + would get from a gearadmin --status + command. + + Example output returned from + _get_raw_data(): + prefix generic_worker4 78 78 500 + generic_worker2 78 78 500 + generic_worker3 0 0 760 + generic_worker1 0 0 500 + + :return: list + """ + + try: + raw = self._get_raw_data() + except (ValueError, AttributeError): + raise GearmanReadException() + + if raw is None: + self.debug("Gearman returned no data") + raise GearmanReadException() + + workers = list() + + for line in raw.splitlines()[:-1]: + parts = line.split() + if not parts: + continue + + name = '_'.join(parts[:-3]) + try: + values = [int(w) for w in parts[-3:]] + except ValueError: + continue + + w = [name] + w.extend(values) + workers.append(w) + + return workers + + def process_jobs(self, active_jobs): + + output = { + 'total_pending': 0, + 'total_idle': 0, + 'total_running': 0, + } + found_jobs = set() + + for parsed_job in active_jobs: + + job_name = self.add_job(parsed_job) + found_jobs.add(job_name) + job_data = build_result_dict(parsed_job) + + for sum_value in ('pending', 'running', 'idle'): + output['total_{0}'.format(sum_value)] += job_data['{0}_{1}'.format(job_name, sum_value)] + + output.update(job_data) + + return found_jobs, output + + def remove_stale_jobs(self, active_job_list): + """ + Removes jobs that have no workers, pending jobs, + or running jobs + :param active_job_list: The latest list of active jobs + :type active_job_list: iterable + :return: None + """ + + for to_remove in self.active_jobs - active_job_list: + self.remove_job(to_remove) + + def add_job(self, parsed_job): + """ + Adds a job to the list of active jobs + :param parsed_job: A parsed job dict + :type parsed_job: dict + :return: None + """ + + def add_chart(job_name): + """ + Adds a new job chart + :param job_name: The name of the job to add + :type job_name: string + :return: None + """ + + job_key = 'job_{0}'.format(job_name) + template = job_chart_template(job_name) + new_chart = self.charts.add_chart([job_key] + template['options']) + for dimension in template['lines']: + new_chart.add_dimension(dimension) + + if parsed_job['job_name'] not in self.active_jobs: + add_chart(parsed_job['job_name']) + self.active_jobs.add(parsed_job['job_name']) + + return parsed_job['job_name'] + + def remove_job(self, job_name): + """ + Removes a job to the list of active jobs + :param job_name: The name of the job to remove + :type job_name: string + :return: None + """ + + def remove_chart(job_name): + """ + Removes a job chart + :param job_name: The name of the job to remove + :type job_name: string + :return: None + """ + + job_key = 'job_{0}'.format(job_name) + self.charts[job_key].obsolete() + del self.charts[job_key] + + remove_chart(job_name) + self.active_jobs.remove(job_name) diff --git a/src/collectors/python.d.plugin/gearman/gearman.conf b/src/collectors/python.d.plugin/gearman/gearman.conf new file mode 100644 index 000000000..635e893ef --- /dev/null +++ b/src/collectors/python.d.plugin/gearman/gearman.conf @@ -0,0 +1,75 @@ +# netdata python.d.plugin configuration for gearman +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, gearman also supports the following: +# +# host: localhost # The host running the Gearman server +# port: 4730 # Port of the Gearman server +# tls: no # Whether to use TLS or not +# cert: /path/to/cert # Path to cert if using TLS +# key: /path/to/key # Path to key if using TLS +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOB + +localhost: + name : 'local' + host : 'localhost' + port : 4730 \ No newline at end of file diff --git a/src/collectors/python.d.plugin/gearman/integrations/gearman.md b/src/collectors/python.d.plugin/gearman/integrations/gearman.md new file mode 100644 index 000000000..717b0dcad --- /dev/null +++ b/src/collectors/python.d.plugin/gearman/integrations/gearman.md @@ -0,0 +1,210 @@ + + +# Gearman + + + + + +Plugin: python.d.plugin +Module: gearman + + + +## Overview + +Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management. + +This collector connects to a Gearman instance via either TCP or unix socket. + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +When no configuration file is found, the collector tries to connect to TCP/IP socket: localhost:4730. + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per Gearman instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| gearman.total_jobs | Pending, Running | Jobs | + +### Per gearman job + +Metrics related to Gearman jobs. Each job produces its own set of the following metrics. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| gearman.single_job | Pending, Idle, Runnning | Jobs | + + + +## Alerts + + +The following alerts are available: + +| Alert name | On metric | Description | +|:------------|:----------|:------------| +| [ gearman_workers_queued ](https://github.com/netdata/netdata/blob/master/src/health/health.d/gearman.conf) | gearman.single_job | average number of queued jobs over the last 10 minutes | + + +## Setup + +### Prerequisites + +#### Socket permissions + +The gearman UNIX socket should have read permission for user netdata. + + +### Configuration + +#### File + +The configuration file name for this integration is `python.d/gearman.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config python.d/gearman.conf +``` +#### Options + +There are 2 sections: + +* Global variables +* One or more JOBS that can define multiple different instances to monitor. + +The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + +Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + +Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + + +
Config options + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Sets the default data collection frequency. | 5 | no | +| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | +| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | +| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | +| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | +| host | URL or IP where gearman is running. | localhost | no | +| port | Port of URL or IP where gearman is running. | 4730 | no | +| tls | Use tls to connect to gearman. | false | no | +| cert | Provide a certificate file if needed to connect to a TLS gearman instance. | | no | +| key | Provide a key file if needed to connect to a TLS gearman instance. | | no | + +
+ +#### Examples + +##### Local gearman service + +A basic host and port gearman configuration for localhost. + +```yaml +localhost: + name: 'local' + host: 'localhost' + port: 4730 + +``` +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +
Config + +```yaml +localhost: + name: 'local' + host: 'localhost' + port: 4730 + +remote: + name: 'remote' + host: '192.0.2.1' + port: 4730 + +``` +
+ + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `gearman` collector, run the `python.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `python.d.plugin` to debug the collector: + + ```bash + ./python.d.plugin gearman debug trace + ``` + + diff --git a/src/collectors/python.d.plugin/gearman/metadata.yaml b/src/collectors/python.d.plugin/gearman/metadata.yaml new file mode 100644 index 000000000..4ab9c12ef --- /dev/null +++ b/src/collectors/python.d.plugin/gearman/metadata.yaml @@ -0,0 +1,168 @@ +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: gearman + monitored_instance: + name: Gearman + link: "http://gearman.org/" + categories: + - data-collection.distributed-computing-systems + icon_filename: "gearman.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - gearman + - gearman job server + most_popular: false + overview: + data_collection: + metrics_description: "Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management." + method_description: "This collector connects to a Gearman instance via either TCP or unix socket." + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "When no configuration file is found, the collector tries to connect to TCP/IP socket: localhost:4730." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: "Socket permissions" + description: The gearman UNIX socket should have read permission for user netdata. + configuration: + file: + name: python.d/gearman.conf + options: + description: | + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: update_every + description: Sets the default data collection frequency. + default_value: 5 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + - name: host + description: URL or IP where gearman is running. + default_value: "localhost" + required: false + - name: port + description: Port of URL or IP where gearman is running. + default_value: "4730" + required: false + - name: tls + description: Use tls to connect to gearman. + default_value: "false" + required: false + - name: cert + description: Provide a certificate file if needed to connect to a TLS gearman instance. + default_value: "" + required: false + - name: key + description: Provide a key file if needed to connect to a TLS gearman instance. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Local gearman service + description: A basic host and port gearman configuration for localhost. + folding: + enabled: false + config: | + localhost: + name: 'local' + host: 'localhost' + port: 4730 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + localhost: + name: 'local' + host: 'localhost' + port: 4730 + + remote: + name: 'remote' + host: '192.0.2.1' + port: 4730 + troubleshooting: + problems: + list: [] + alerts: + - name: gearman_workers_queued + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/gearman.conf + metric: gearman.single_job + info: average number of queued jobs over the last 10 minutes + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: gearman.total_jobs + description: Total Jobs + unit: "Jobs" + chart_type: line + dimensions: + - name: Pending + - name: Running + - name: gearman job + description: "Metrics related to Gearman jobs. Each job produces its own set of the following metrics." + labels: [] + metrics: + - name: gearman.single_job + description: "{job_name}" + unit: "Jobs" + chart_type: stacked + dimensions: + - name: Pending + - name: Idle + - name: Runnning -- cgit v1.2.3