diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:18 +0000 |
commit | 5da14042f70711ea5cf66e034699730335462f66 (patch) | |
tree | 0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/go/collectors/go.d.plugin/modules/systemdunits | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz netdata-5da14042f70711ea5cf66e034699730335462f66.zip |
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/go/collectors/go.d.plugin/modules/systemdunits')
13 files changed, 1991 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/README.md b/src/go/collectors/go.d.plugin/modules/systemdunits/README.md new file mode 120000 index 000000000..68dd433bf --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/README.md @@ -0,0 +1 @@ +integrations/systemd_units.md
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/charts.go b/src/go/collectors/go.d.plugin/modules/systemdunits/charts.go new file mode 100644 index 000000000..210fc568d --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/charts.go @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +//go:build linux +// +build linux + +package systemdunits + +import ( + "fmt" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "golang.org/x/text/cases" + "golang.org/x/text/language" +) + +const ( + prioServiceUnitState = module.Priority + iota + prioSocketUnitState + prioTargetUnitState + prioPathUnitState + prioDeviceUnitState + prioMountUnitState + prioAutomountUnitState + prioSwapUnitState + prioTimerUnitState + prioScopeUnitState + prioSliceUnitState +) + +var prioMap = map[string]int{ + unitTypeService: prioServiceUnitState, + unitTypeSocket: prioSocketUnitState, + unitTypeTarget: prioTargetUnitState, + unitTypePath: prioPathUnitState, + unitTypeDevice: prioDeviceUnitState, + unitTypeMount: prioMountUnitState, + unitTypeAutomount: prioAutomountUnitState, + unitTypeSwap: prioSwapUnitState, + unitTypeTimer: prioTimerUnitState, + unitTypeScope: prioScopeUnitState, + unitTypeSlice: prioSliceUnitState, +} + +func newTypedUnitStateChartTmpl(name, typ string) *module.Chart { + chart := module.Chart{ + ID: fmt.Sprintf("unit_%s_%s_state", name, typ), + Title: fmt.Sprintf("%s Unit State", cases.Title(language.English, cases.Compact).String(typ)), + Units: "state", + Fam: fmt.Sprintf("%s units", typ), + Ctx: fmt.Sprintf("systemd.%s_unit_state", typ), + Priority: prioMap[typ], + Labels: []module.Label{ + {Key: "unit_name", Value: name}, + }, + Dims: module.Dims{ + {Name: unitStateActive}, + {Name: unitStateInactive}, + {Name: unitStateActivating}, + {Name: unitStateDeactivating}, + {Name: unitStateFailed}, + }, + } + for _, d := range chart.Dims { + d.ID = fmt.Sprintf("unit_%s_%s_state_%s", name, typ, d.Name) + } + return &chart +} + +func (s *SystemdUnits) addUnitToCharts(name, typ string) { + chart := newTypedUnitStateChartTmpl(name, typ) + + if err := s.Charts().Add(chart); err != nil { + s.Warning(err) + } +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/client.go b/src/go/collectors/go.d.plugin/modules/systemdunits/client.go new file mode 100644 index 000000000..a2787c4ec --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/client.go @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +//go:build linux +// +build linux + +package systemdunits + +import ( + "context" + + "github.com/coreos/go-systemd/v22/dbus" +) + +type systemdClient interface { + connect() (systemdConnection, error) +} +type systemdConnection interface { + Close() + GetManagerProperty(string) (string, error) + ListUnitsContext(ctx context.Context) ([]dbus.UnitStatus, error) + ListUnitsByPatternsContext(ctx context.Context, states []string, patterns []string) ([]dbus.UnitStatus, error) +} + +type systemdDBusClient struct{} + +func (systemdDBusClient) connect() (systemdConnection, error) { + return dbus.NewWithContext(context.Background()) +} + +func newSystemdDBusClient() *systemdDBusClient { + return &systemdDBusClient{} +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/collect.go b/src/go/collectors/go.d.plugin/modules/systemdunits/collect.go new file mode 100644 index 000000000..eb596605f --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/collect.go @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +//go:build linux +// +build linux + +package systemdunits + +import ( + "context" + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/coreos/go-systemd/v22/dbus" +) + +const ( + // https://www.freedesktop.org/software/systemd/man/systemd.html + unitStateActive = "active" + unitStateInactive = "inactive" + unitStateActivating = "activating" + unitStateDeactivating = "deactivating" + unitStateFailed = "failed" + + // https://www.freedesktop.org/software/systemd/man/systemd.html + unitTypeService = "service" + unitTypeSocket = "socket" + unitTypeTarget = "target" + unitTypePath = "path" + unitTypeDevice = "device" + unitTypeMount = "mount" + unitTypeAutomount = "automount" + unitTypeSwap = "swap" + unitTypeTimer = "timer" + unitTypeScope = "scope" + unitTypeSlice = "slice" +) + +var ( + unitStates = []string{ + unitStateActive, + unitStateActivating, + unitStateFailed, + unitStateInactive, + unitStateDeactivating, + } +) + +func (s *SystemdUnits) collect() (map[string]int64, error) { + conn, err := s.getConnection() + if err != nil { + return nil, err + } + + if s.systemdVersion == 0 { + ver, err := s.getSystemdVersion(conn) + if err != nil { + s.closeConnection() + return nil, err + } + s.systemdVersion = ver + } + + var units []dbus.UnitStatus + if s.systemdVersion >= 230 { + // https://github.com/systemd/systemd/pull/3142 + units, err = s.getLoadedUnitsByPatterns(conn) + } else { + units, err = s.getLoadedUnits(conn) + } + if err != nil { + s.closeConnection() + return nil, err + } + + if len(units) == 0 { + return nil, nil + } + + mx := make(map[string]int64) + s.collectUnitsStates(mx, units) + + return mx, nil +} + +func (s *SystemdUnits) collectUnitsStates(mx map[string]int64, units []dbus.UnitStatus) { + for _, unit := range units { + name, typ := extractUnitNameType(cleanUnitName(unit.Name)) + if name == "" || typ == "" { + continue + } + + if !s.units[unit.Name] { + s.units[unit.Name] = true + s.addUnitToCharts(name, typ) + } + + for _, s := range unitStates { + mx[fmt.Sprintf("unit_%s_%s_state_%s", name, typ, s)] = 0 + } + mx[fmt.Sprintf("unit_%s_%s_state_%s", name, typ, unit.ActiveState)] = 1 + } +} + +func (s *SystemdUnits) getConnection() (systemdConnection, error) { + if s.conn == nil { + conn, err := s.client.connect() + if err != nil { + return nil, fmt.Errorf("error on creating a connection: %v", err) + } + s.conn = conn + } + return s.conn, nil +} + +func (s *SystemdUnits) closeConnection() { + if s.conn != nil { + s.conn.Close() + s.conn = nil + } +} + +var reVersion = regexp.MustCompile(`[0-9][0-9][0-9]`) + +const versionProperty = "Version" + +func (s *SystemdUnits) getSystemdVersion(conn systemdConnection) (int, error) { + s.Debugf("calling function 'GetManagerProperty'") + version, err := conn.GetManagerProperty(versionProperty) + if err != nil { + return 0, fmt.Errorf("error on getting '%s' manager property: %v", versionProperty, err) + } + + s.Debugf("systemd version: %s", version) + + major := reVersion.FindString(version) + if major == "" { + return 0, fmt.Errorf("couldn't parse systemd version string '%s'", version) + } + + ver, err := strconv.Atoi(major) + if err != nil { + return 0, fmt.Errorf("couldn't parse systemd version string '%s': %v", version, err) + } + + return ver, nil +} + +func (s *SystemdUnits) getLoadedUnits(conn systemdConnection) ([]dbus.UnitStatus, error) { + ctx, cancel := context.WithTimeout(context.Background(), s.Timeout.Duration()) + defer cancel() + + s.Debugf("calling function 'ListUnits'") + units, err := conn.ListUnitsContext(ctx) + if err != nil { + return nil, fmt.Errorf("error on ListUnits: %v", err) + } + + loaded := units[:0] + for _, unit := range units { + if unit.LoadState == "loaded" && s.sr.MatchString(unit.Name) { + loaded = append(loaded, unit) + } + } + s.Debugf("got total/loaded %d/%d units", len(units), len(loaded)) + + return loaded, nil +} + +func (s *SystemdUnits) getLoadedUnitsByPatterns(conn systemdConnection) ([]dbus.UnitStatus, error) { + ctx, cancel := context.WithTimeout(context.Background(), s.Timeout.Duration()) + defer cancel() + + s.Debugf("calling function 'ListUnitsByPatterns'") + + units, err := conn.ListUnitsByPatternsContext(ctx, unitStates, s.Include) + if err != nil { + return nil, fmt.Errorf("error on ListUnitsByPatterns: %v", err) + } + + loaded := units[:0] + for _, unit := range units { + if unit.LoadState == "loaded" { + loaded = append(loaded, unit) + } + } + s.Debugf("got total/loaded %d/%d units", len(units), len(loaded)) + + return loaded, nil +} + +func extractUnitNameType(name string) (string, string) { + idx := strings.LastIndexByte(name, '.') + if idx <= 0 { + return "", "" + } + return name[:idx], name[idx+1:] +} + +func cleanUnitName(name string) string { + // dev-disk-by\x2duuid-DE44\x2dCEE0.device => dev-disk-by-uuid-DE44-CEE0.device + if strings.IndexByte(name, '\\') == -1 { + return name + } + v, err := strconv.Unquote("\"" + name + "\"") + if err != nil { + return name + } + return v +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/config_schema.json b/src/go/collectors/go.d.plugin/modules/systemdunits/config_schema.json new file mode 100644 index 000000000..8dc57a3d8 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/config_schema.json @@ -0,0 +1,58 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Systemdunits collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 10 + }, + "timeout": { + "title": "Timeout", + "description": "The timeout, in seconds, for connecting and querying systemd's D-Bus endpoint.", + "type": "number", + "minimum": 0.5, + "default": 2 + }, + "include": { + "title": "Include", + "description": "Configuration for monitoring specific systemd units. Include systemd units whose names match any of the specified [patterns](https://golang.org/pkg/path/filepath/#Match).", + "type": [ + "array", + "null" + ], + "uniqueItems": true, + "minItems": 1, + "items": { + "title": "Unit pattern", + "type": "string" + }, + "default": [ + "*.service" + ] + } + }, + "required": [ + "include" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + }, + "include": { + "ui:listFlavour": "list" + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/doc.go b/src/go/collectors/go.d.plugin/modules/systemdunits/doc.go new file mode 100644 index 000000000..8bb45fab9 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/doc.go @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// Package systemdunits is a systemd units states collector +package systemdunits diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/init.go b/src/go/collectors/go.d.plugin/modules/systemdunits/init.go new file mode 100644 index 000000000..e59290ace --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/init.go @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +//go:build linux +// +build linux + +package systemdunits + +import ( + "errors" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/pkg/matcher" +) + +func (s *SystemdUnits) validateConfig() error { + if len(s.Include) == 0 { + return errors.New("'include' option not set") + } + return nil +} + +func (s *SystemdUnits) initSelector() (matcher.Matcher, error) { + if len(s.Include) == 0 { + return matcher.TRUE(), nil + } + + expr := strings.Join(s.Include, " ") + return matcher.NewSimplePatternsMatcher(expr) +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/integrations/systemd_units.md b/src/go/collectors/go.d.plugin/modules/systemdunits/integrations/systemd_units.md new file mode 100644 index 000000000..001cd87a3 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/integrations/systemd_units.md @@ -0,0 +1,253 @@ +<!--startmeta +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/systemdunits/README.md" +meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/systemdunits/metadata.yaml" +sidebar_label: "Systemd Units" +learn_status: "Published" +learn_rel_path: "Collecting Metrics/Systemd" +most_popular: False +message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" +endmeta--> + +# Systemd Units + + +<img src="https://netdata.cloud/img/systemd.svg" width="150"/> + + +Plugin: go.d.plugin +Module: systemdunits + +<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> + +## Overview + +This collector monitors Systemd units state. + + + + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +This integration doesn't support auto-detection. + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per unit + +These metrics refer to the systemd unit. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| unit_name | systemd unit name | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| systemd.service_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.socket_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.target_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.path_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.device_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.mount_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.automount_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.swap_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.timer_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.scope_unit_state | active, inactive, activating, deactivating, failed | state | +| systemd.slice_unit_state | active, inactive, activating, deactivating, failed | state | + + + +## Alerts + + +The following alerts are available: + +| Alert name | On metric | Description | +|:------------|:----------|:------------| +| [ systemd_service_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.service_unit_state | systemd service unit in the failed state | +| [ systemd_socket_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.socket_unit_state | systemd socket unit in the failed state | +| [ systemd_target_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.target_unit_state | systemd target unit in the failed state | +| [ systemd_path_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.path_unit_state | systemd path unit in the failed state | +| [ systemd_device_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.device_unit_state | systemd device unit in the failed state | +| [ systemd_mount_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.mount_unit_state | systemd mount unit in the failed state | +| [ systemd_automount_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.automount_unit_state | systemd automount unit in the failed state | +| [ systemd_swap_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.swap_unit_state | systemd swap unit in the failed state | +| [ systemd_scope_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.scope_unit_state | systemd scope unit in the failed state | +| [ systemd_slice_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.slice_unit_state | systemd slice unit in the failed state | +| [ systemd_timer_unit_failed_state ](https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf) | systemd.timer_unit_state | systemd timer unit in the failed state | + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/systemdunits.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/systemdunits.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +<details><summary>Config options</summary> + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| include | Systemd units filter. | *.service | no | +| timeout | System bus requests timeout. | 1 | no | + +##### include + +Systemd units matching the selector will be monitored. + +- Logic: (pattern1 OR pattern2) +- Pattern syntax: [shell file name pattern](https://golang.org/pkg/path/filepath/#Match) +- Syntax: + +```yaml +includes: + - pattern1 + - pattern2 +``` + + +</details> + +#### Examples + +##### Service units + +Collect state of all service type units. + +<details><summary>Config</summary> + +```yaml +jobs: + - name: service + include: + - '*.service' + +``` +</details> + +##### One specific unit + +Collect state of one specific unit. + +<details><summary>Config</summary> + +```yaml +jobs: + - name: my-specific-service + include: + - 'my-specific.service' + +``` +</details> + +##### All unit types + +Collect state of all units. + +<details><summary>Config</summary> + +```yaml +jobs: + - name: my-specific-service-unit + include: + - '*' + +``` +</details> + +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collect state of all service and socket type units. + + +<details><summary>Config</summary> + +```yaml +jobs: + - name: service + include: + - '*.service' + + - name: socket + include: + - '*.socket' + +``` +</details> + + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `systemdunits` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m systemdunits + ``` + + diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/metadata.yaml b/src/go/collectors/go.d.plugin/modules/systemdunits/metadata.yaml new file mode 100644 index 000000000..21755bb69 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/metadata.yaml @@ -0,0 +1,290 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-systemdunits + plugin_name: go.d.plugin + module_name: systemdunits + monitored_instance: + name: Systemd Units + link: https://www.freedesktop.org/wiki/Software/systemd/ + icon_filename: systemd.svg + categories: + - data-collection.systemd + keywords: + - systemd + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors Systemd units state. + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: go.d/systemdunits.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: include + description: Systemd units filter. + default_value: "*.service" + required: false + detailed_description: | + Systemd units matching the selector will be monitored. + + - Logic: (pattern1 OR pattern2) + - Pattern syntax: [shell file name pattern](https://golang.org/pkg/path/filepath/#Match) + - Syntax: + + ```yaml + includes: + - pattern1 + - pattern2 + ``` + - name: timeout + description: System bus requests timeout. + default_value: 1 + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Service units + description: Collect state of all service type units. + config: | + jobs: + - name: service + include: + - '*.service' + - name: One specific unit + description: Collect state of one specific unit. + config: | + jobs: + - name: my-specific-service + include: + - 'my-specific.service' + - name: All unit types + description: Collect state of all units. + config: | + jobs: + - name: my-specific-service-unit + include: + - '*' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collect state of all service and socket type units. + config: | + jobs: + - name: service + include: + - '*.service' + + - name: socket + include: + - '*.socket' + troubleshooting: + problems: + list: [] + alerts: + - name: systemd_service_unit_failed_state + metric: systemd.service_unit_state + info: systemd service unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_socket_unit_failed_state + metric: systemd.socket_unit_state + info: systemd socket unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_target_unit_failed_state + metric: systemd.target_unit_state + info: systemd target unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_path_unit_failed_state + metric: systemd.path_unit_state + info: systemd path unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_device_unit_failed_state + metric: systemd.device_unit_state + info: systemd device unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_mount_unit_failed_state + metric: systemd.mount_unit_state + info: systemd mount unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_automount_unit_failed_state + metric: systemd.automount_unit_state + info: systemd automount unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_swap_unit_failed_state + metric: systemd.swap_unit_state + info: systemd swap unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_scope_unit_failed_state + metric: systemd.scope_unit_state + info: systemd scope unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_slice_unit_failed_state + metric: systemd.slice_unit_state + info: systemd slice unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + - name: systemd_timer_unit_failed_state + metric: systemd.timer_unit_state + info: systemd timer unit in the failed state + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/systemdunits.conf + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: unit + description: These metrics refer to the systemd unit. + labels: + - name: unit_name + description: systemd unit name + metrics: + - name: systemd.service_unit_state + description: Service Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.socket_unit_state + description: Socket Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.target_unit_state + description: Target Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.path_unit_state + description: Path Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.device_unit_state + description: Device Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.mount_unit_state + description: Mount Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.automount_unit_state + description: Automount Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.swap_unit_state + description: Swap Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.timer_unit_state + description: Timer Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.scope_unit_state + description: Scope Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed + - name: systemd.slice_unit_state + description: Slice Unit State + unit: state + chart_type: line + dimensions: + - name: active + - name: inactive + - name: activating + - name: deactivating + - name: failed diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/systemdunits.go b/src/go/collectors/go.d.plugin/modules/systemdunits/systemdunits.go new file mode 100644 index 000000000..345b2525a --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/systemdunits.go @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +//go:build linux +// +build linux + +package systemdunits + +import ( + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/matcher" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("systemdunits", module.Creator{ + JobConfigSchema: configSchema, + Defaults: module.Defaults{ + UpdateEvery: 10, // gathering systemd units can be a CPU intensive op + }, + Create: func() module.Module { return New() }, + }) +} + +func New() *SystemdUnits { + return &SystemdUnits{ + Config: Config{ + Timeout: web.Duration(time.Second * 2), + Include: []string{ + "*.service", + }, + }, + + charts: &module.Charts{}, + client: newSystemdDBusClient(), + units: make(map[string]bool), + } +} + +type Config struct { + UpdateEvery int `yaml:"update_every" json:"update_every"` + Timeout web.Duration `yaml:"timeout" json:"timeout"` + Include []string `yaml:"include" json:"include"` +} + +type SystemdUnits struct { + module.Base + Config `yaml:",inline" json:""` + + client systemdClient + conn systemdConnection + + systemdVersion int + units map[string]bool + sr matcher.Matcher + + charts *module.Charts +} + +func (s *SystemdUnits) Configuration() any { + return s.Config +} + +func (s *SystemdUnits) Init() error { + err := s.validateConfig() + if err != nil { + s.Errorf("config validation: %v", err) + return err + } + + sr, err := s.initSelector() + if err != nil { + s.Errorf("init selector: %v", err) + return err + } + s.sr = sr + + s.Debugf("unit names patterns: %v", s.Include) + s.Debugf("timeout: %s", s.Timeout) + + return nil +} + +func (s *SystemdUnits) Check() error { + mx, err := s.collect() + if err != nil { + s.Error(err) + return err + } + if len(mx) == 0 { + return errors.New("no metrics collected") + } + return nil +} + +func (s *SystemdUnits) Charts() *module.Charts { + return s.charts +} + +func (s *SystemdUnits) Collect() map[string]int64 { + mx, err := s.collect() + if err != nil { + s.Error(err) + } + + if len(mx) == 0 { + return nil + } + return mx +} + +func (s *SystemdUnits) Cleanup() { + s.closeConnection() +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/systemdunits_test.go b/src/go/collectors/go.d.plugin/modules/systemdunits/systemdunits_test.go new file mode 100644 index 000000000..3a1a59424 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/systemdunits_test.go @@ -0,0 +1,906 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +//go:build linux +// +build linux + +package systemdunits + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "github.com/coreos/go-systemd/v22/dbus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + } { + require.NotNil(t, data, name) + } +} + +func TestSystemdUnits_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &SystemdUnits{}, dataConfigJSON, dataConfigYAML) +} + +func TestSystemdUnits_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "success on default config": { + config: New().Config, + }, + "success when 'include' option set": { + config: Config{ + Include: []string{"*"}, + }, + }, + "fails when 'include' option not set": { + wantFail: true, + config: Config{Include: []string{}}, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + systemd := New() + systemd.Config = test.config + + if test.wantFail { + assert.Error(t, systemd.Init()) + } else { + assert.NoError(t, systemd.Init()) + } + }) + } +} + +func TestSystemdUnits_Check(t *testing.T) { + tests := map[string]struct { + prepare func() *SystemdUnits + wantFail bool + }{ + "success on systemd v230+": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*"} + systemd.client = prepareOKClient(230) + return systemd + }, + }, + "success on systemd v230-": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*"} + systemd.client = prepareOKClient(220) + return systemd + }, + }, + "fails when all unites are filtered": { + wantFail: true, + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*.not_exists"} + systemd.client = prepareOKClient(230) + return systemd + }, + }, + "fails on error on connect": { + wantFail: true, + prepare: func() *SystemdUnits { + systemd := New() + systemd.client = prepareClientErrOnConnect() + return systemd + }, + }, + "fails on error on get manager property": { + wantFail: true, + prepare: func() *SystemdUnits { + systemd := New() + systemd.client = prepareClientErrOnGetManagerProperty() + return systemd + }, + }, + "fails on error on list units": { + wantFail: true, + prepare: func() *SystemdUnits { + systemd := New() + systemd.client = prepareClientErrOnListUnits() + return systemd + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + systemd := test.prepare() + require.NoError(t, systemd.Init()) + + if test.wantFail { + assert.Error(t, systemd.Check()) + } else { + assert.NoError(t, systemd.Check()) + } + }) + } +} + +func TestSystemdUnits_Charts(t *testing.T) { + systemd := New() + require.NoError(t, systemd.Init()) + assert.NotNil(t, systemd.Charts()) +} + +func TestSystemdUnits_Cleanup(t *testing.T) { + systemd := New() + systemd.Include = []string{"*"} + client := prepareOKClient(230) + systemd.client = client + + require.NoError(t, systemd.Init()) + require.NotNil(t, systemd.Collect()) + conn := systemd.conn + systemd.Cleanup() + + assert.Nil(t, systemd.conn) + v, _ := conn.(*mockConn) + assert.True(t, v.closeCalled) +} + +func TestSystemdUnits_Collect(t *testing.T) { + tests := map[string]struct { + prepare func() *SystemdUnits + wantCollected map[string]int64 + }{ + "success on systemd v230+ on collecting all unit type": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*"} + systemd.client = prepareOKClient(230) + return systemd + }, + wantCollected: map[string]int64{ + "unit_dbus_socket_state_activating": 0, + "unit_dbus_socket_state_active": 1, + "unit_dbus_socket_state_deactivating": 0, + "unit_dbus_socket_state_failed": 0, + "unit_dbus_socket_state_inactive": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_activating": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_active": 1, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_deactivating": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_failed": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_inactive": 0, + "unit_dev-nvme0n1_device_state_activating": 0, + "unit_dev-nvme0n1_device_state_active": 1, + "unit_dev-nvme0n1_device_state_deactivating": 0, + "unit_dev-nvme0n1_device_state_failed": 0, + "unit_dev-nvme0n1_device_state_inactive": 0, + "unit_docker_socket_state_activating": 0, + "unit_docker_socket_state_active": 1, + "unit_docker_socket_state_deactivating": 0, + "unit_docker_socket_state_failed": 0, + "unit_docker_socket_state_inactive": 0, + "unit_getty-pre_target_state_activating": 0, + "unit_getty-pre_target_state_active": 0, + "unit_getty-pre_target_state_deactivating": 0, + "unit_getty-pre_target_state_failed": 0, + "unit_getty-pre_target_state_inactive": 1, + "unit_init_scope_state_activating": 0, + "unit_init_scope_state_active": 1, + "unit_init_scope_state_deactivating": 0, + "unit_init_scope_state_failed": 0, + "unit_init_scope_state_inactive": 0, + "unit_logrotate_timer_state_activating": 0, + "unit_logrotate_timer_state_active": 1, + "unit_logrotate_timer_state_deactivating": 0, + "unit_logrotate_timer_state_failed": 0, + "unit_logrotate_timer_state_inactive": 0, + "unit_lvm2-lvmetad_socket_state_activating": 0, + "unit_lvm2-lvmetad_socket_state_active": 1, + "unit_lvm2-lvmetad_socket_state_deactivating": 0, + "unit_lvm2-lvmetad_socket_state_failed": 0, + "unit_lvm2-lvmetad_socket_state_inactive": 0, + "unit_lvm2-lvmpolld_socket_state_activating": 0, + "unit_lvm2-lvmpolld_socket_state_active": 1, + "unit_lvm2-lvmpolld_socket_state_deactivating": 0, + "unit_lvm2-lvmpolld_socket_state_failed": 0, + "unit_lvm2-lvmpolld_socket_state_inactive": 0, + "unit_man-db_timer_state_activating": 0, + "unit_man-db_timer_state_active": 1, + "unit_man-db_timer_state_deactivating": 0, + "unit_man-db_timer_state_failed": 0, + "unit_man-db_timer_state_inactive": 0, + "unit_org.cups.cupsd_path_state_activating": 0, + "unit_org.cups.cupsd_path_state_active": 1, + "unit_org.cups.cupsd_path_state_deactivating": 0, + "unit_org.cups.cupsd_path_state_failed": 0, + "unit_org.cups.cupsd_path_state_inactive": 0, + "unit_pamac-cleancache_timer_state_activating": 0, + "unit_pamac-cleancache_timer_state_active": 1, + "unit_pamac-cleancache_timer_state_deactivating": 0, + "unit_pamac-cleancache_timer_state_failed": 0, + "unit_pamac-cleancache_timer_state_inactive": 0, + "unit_pamac-mirrorlist_timer_state_activating": 0, + "unit_pamac-mirrorlist_timer_state_active": 1, + "unit_pamac-mirrorlist_timer_state_deactivating": 0, + "unit_pamac-mirrorlist_timer_state_failed": 0, + "unit_pamac-mirrorlist_timer_state_inactive": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_activating": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_active": 1, + "unit_proc-sys-fs-binfmt_misc_automount_state_deactivating": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_failed": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_inactive": 0, + "unit_remote-fs-pre_target_state_activating": 0, + "unit_remote-fs-pre_target_state_active": 0, + "unit_remote-fs-pre_target_state_deactivating": 0, + "unit_remote-fs-pre_target_state_failed": 0, + "unit_remote-fs-pre_target_state_inactive": 1, + "unit_rpc_pipefs_target_state_activating": 0, + "unit_rpc_pipefs_target_state_active": 0, + "unit_rpc_pipefs_target_state_deactivating": 0, + "unit_rpc_pipefs_target_state_failed": 0, + "unit_rpc_pipefs_target_state_inactive": 1, + "unit_run-user-1000-gvfs_mount_state_activating": 0, + "unit_run-user-1000-gvfs_mount_state_active": 1, + "unit_run-user-1000-gvfs_mount_state_deactivating": 0, + "unit_run-user-1000-gvfs_mount_state_failed": 0, + "unit_run-user-1000-gvfs_mount_state_inactive": 0, + "unit_run-user-1000_mount_state_activating": 0, + "unit_run-user-1000_mount_state_active": 1, + "unit_run-user-1000_mount_state_deactivating": 0, + "unit_run-user-1000_mount_state_failed": 0, + "unit_run-user-1000_mount_state_inactive": 0, + "unit_session-1_scope_state_activating": 0, + "unit_session-1_scope_state_active": 1, + "unit_session-1_scope_state_deactivating": 0, + "unit_session-1_scope_state_failed": 0, + "unit_session-1_scope_state_inactive": 0, + "unit_session-2_scope_state_activating": 0, + "unit_session-2_scope_state_active": 1, + "unit_session-2_scope_state_deactivating": 0, + "unit_session-2_scope_state_failed": 0, + "unit_session-2_scope_state_inactive": 0, + "unit_session-3_scope_state_activating": 0, + "unit_session-3_scope_state_active": 1, + "unit_session-3_scope_state_deactivating": 0, + "unit_session-3_scope_state_failed": 0, + "unit_session-3_scope_state_inactive": 0, + "unit_session-6_scope_state_activating": 0, + "unit_session-6_scope_state_active": 1, + "unit_session-6_scope_state_deactivating": 0, + "unit_session-6_scope_state_failed": 0, + "unit_session-6_scope_state_inactive": 0, + "unit_shadow_timer_state_activating": 0, + "unit_shadow_timer_state_active": 1, + "unit_shadow_timer_state_deactivating": 0, + "unit_shadow_timer_state_failed": 0, + "unit_shadow_timer_state_inactive": 0, + "unit_sound_target_state_activating": 0, + "unit_sound_target_state_active": 1, + "unit_sound_target_state_deactivating": 0, + "unit_sound_target_state_failed": 0, + "unit_sound_target_state_inactive": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_activating": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_active": 1, + "unit_sys-devices-virtual-net-loopback1_device_state_deactivating": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_failed": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_inactive": 0, + "unit_sys-module-fuse_device_state_activating": 0, + "unit_sys-module-fuse_device_state_active": 1, + "unit_sys-module-fuse_device_state_deactivating": 0, + "unit_sys-module-fuse_device_state_failed": 0, + "unit_sys-module-fuse_device_state_inactive": 0, + "unit_sysinit_target_state_activating": 0, + "unit_sysinit_target_state_active": 1, + "unit_sysinit_target_state_deactivating": 0, + "unit_sysinit_target_state_failed": 0, + "unit_sysinit_target_state_inactive": 0, + "unit_system-getty_slice_state_activating": 0, + "unit_system-getty_slice_state_active": 1, + "unit_system-getty_slice_state_deactivating": 0, + "unit_system-getty_slice_state_failed": 0, + "unit_system-getty_slice_state_inactive": 0, + "unit_system-netctl_slice_state_activating": 0, + "unit_system-netctl_slice_state_active": 1, + "unit_system-netctl_slice_state_deactivating": 0, + "unit_system-netctl_slice_state_failed": 0, + "unit_system-netctl_slice_state_inactive": 0, + "unit_system-systemd-fsck_slice_state_activating": 0, + "unit_system-systemd-fsck_slice_state_active": 1, + "unit_system-systemd-fsck_slice_state_deactivating": 0, + "unit_system-systemd-fsck_slice_state_failed": 0, + "unit_system-systemd-fsck_slice_state_inactive": 0, + "unit_system_slice_state_activating": 0, + "unit_system_slice_state_active": 1, + "unit_system_slice_state_deactivating": 0, + "unit_system_slice_state_failed": 0, + "unit_system_slice_state_inactive": 0, + "unit_systemd-ask-password-console_path_state_activating": 0, + "unit_systemd-ask-password-console_path_state_active": 1, + "unit_systemd-ask-password-console_path_state_deactivating": 0, + "unit_systemd-ask-password-console_path_state_failed": 0, + "unit_systemd-ask-password-console_path_state_inactive": 0, + "unit_systemd-ask-password-wall_path_state_activating": 0, + "unit_systemd-ask-password-wall_path_state_active": 1, + "unit_systemd-ask-password-wall_path_state_deactivating": 0, + "unit_systemd-ask-password-wall_path_state_failed": 0, + "unit_systemd-ask-password-wall_path_state_inactive": 0, + "unit_systemd-ask-password-wall_service_state_activating": 0, + "unit_systemd-ask-password-wall_service_state_active": 0, + "unit_systemd-ask-password-wall_service_state_deactivating": 0, + "unit_systemd-ask-password-wall_service_state_failed": 0, + "unit_systemd-ask-password-wall_service_state_inactive": 1, + "unit_systemd-fsck-root_service_state_activating": 0, + "unit_systemd-fsck-root_service_state_active": 0, + "unit_systemd-fsck-root_service_state_deactivating": 0, + "unit_systemd-fsck-root_service_state_failed": 0, + "unit_systemd-fsck-root_service_state_inactive": 1, + "unit_systemd-udevd-kernel_socket_state_activating": 0, + "unit_systemd-udevd-kernel_socket_state_active": 1, + "unit_systemd-udevd-kernel_socket_state_deactivating": 0, + "unit_systemd-udevd-kernel_socket_state_failed": 0, + "unit_systemd-udevd-kernel_socket_state_inactive": 0, + "unit_tmp_mount_state_activating": 0, + "unit_tmp_mount_state_active": 1, + "unit_tmp_mount_state_deactivating": 0, + "unit_tmp_mount_state_failed": 0, + "unit_tmp_mount_state_inactive": 0, + "unit_user-runtime-dir@1000_service_state_activating": 0, + "unit_user-runtime-dir@1000_service_state_active": 1, + "unit_user-runtime-dir@1000_service_state_deactivating": 0, + "unit_user-runtime-dir@1000_service_state_failed": 0, + "unit_user-runtime-dir@1000_service_state_inactive": 0, + "unit_user@1000_service_state_activating": 0, + "unit_user@1000_service_state_active": 1, + "unit_user@1000_service_state_deactivating": 0, + "unit_user@1000_service_state_failed": 0, + "unit_user@1000_service_state_inactive": 0, + "unit_user_slice_state_activating": 0, + "unit_user_slice_state_active": 1, + "unit_user_slice_state_deactivating": 0, + "unit_user_slice_state_failed": 0, + "unit_user_slice_state_inactive": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_activating": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_active": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_deactivating": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_failed": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_inactive": 1, + }, + }, + "success on systemd v230- on collecting all unit types": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*"} + systemd.client = prepareOKClient(220) + return systemd + }, + wantCollected: map[string]int64{ + "unit_dbus_socket_state_activating": 0, + "unit_dbus_socket_state_active": 1, + "unit_dbus_socket_state_deactivating": 0, + "unit_dbus_socket_state_failed": 0, + "unit_dbus_socket_state_inactive": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_activating": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_active": 1, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_deactivating": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_failed": 0, + "unit_dev-disk-by-uuid-DE44-CEE0_device_state_inactive": 0, + "unit_dev-nvme0n1_device_state_activating": 0, + "unit_dev-nvme0n1_device_state_active": 1, + "unit_dev-nvme0n1_device_state_deactivating": 0, + "unit_dev-nvme0n1_device_state_failed": 0, + "unit_dev-nvme0n1_device_state_inactive": 0, + "unit_docker_socket_state_activating": 0, + "unit_docker_socket_state_active": 1, + "unit_docker_socket_state_deactivating": 0, + "unit_docker_socket_state_failed": 0, + "unit_docker_socket_state_inactive": 0, + "unit_getty-pre_target_state_activating": 0, + "unit_getty-pre_target_state_active": 0, + "unit_getty-pre_target_state_deactivating": 0, + "unit_getty-pre_target_state_failed": 0, + "unit_getty-pre_target_state_inactive": 1, + "unit_init_scope_state_activating": 0, + "unit_init_scope_state_active": 1, + "unit_init_scope_state_deactivating": 0, + "unit_init_scope_state_failed": 0, + "unit_init_scope_state_inactive": 0, + "unit_logrotate_timer_state_activating": 0, + "unit_logrotate_timer_state_active": 1, + "unit_logrotate_timer_state_deactivating": 0, + "unit_logrotate_timer_state_failed": 0, + "unit_logrotate_timer_state_inactive": 0, + "unit_lvm2-lvmetad_socket_state_activating": 0, + "unit_lvm2-lvmetad_socket_state_active": 1, + "unit_lvm2-lvmetad_socket_state_deactivating": 0, + "unit_lvm2-lvmetad_socket_state_failed": 0, + "unit_lvm2-lvmetad_socket_state_inactive": 0, + "unit_lvm2-lvmpolld_socket_state_activating": 0, + "unit_lvm2-lvmpolld_socket_state_active": 1, + "unit_lvm2-lvmpolld_socket_state_deactivating": 0, + "unit_lvm2-lvmpolld_socket_state_failed": 0, + "unit_lvm2-lvmpolld_socket_state_inactive": 0, + "unit_man-db_timer_state_activating": 0, + "unit_man-db_timer_state_active": 1, + "unit_man-db_timer_state_deactivating": 0, + "unit_man-db_timer_state_failed": 0, + "unit_man-db_timer_state_inactive": 0, + "unit_org.cups.cupsd_path_state_activating": 0, + "unit_org.cups.cupsd_path_state_active": 1, + "unit_org.cups.cupsd_path_state_deactivating": 0, + "unit_org.cups.cupsd_path_state_failed": 0, + "unit_org.cups.cupsd_path_state_inactive": 0, + "unit_pamac-cleancache_timer_state_activating": 0, + "unit_pamac-cleancache_timer_state_active": 1, + "unit_pamac-cleancache_timer_state_deactivating": 0, + "unit_pamac-cleancache_timer_state_failed": 0, + "unit_pamac-cleancache_timer_state_inactive": 0, + "unit_pamac-mirrorlist_timer_state_activating": 0, + "unit_pamac-mirrorlist_timer_state_active": 1, + "unit_pamac-mirrorlist_timer_state_deactivating": 0, + "unit_pamac-mirrorlist_timer_state_failed": 0, + "unit_pamac-mirrorlist_timer_state_inactive": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_activating": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_active": 1, + "unit_proc-sys-fs-binfmt_misc_automount_state_deactivating": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_failed": 0, + "unit_proc-sys-fs-binfmt_misc_automount_state_inactive": 0, + "unit_remote-fs-pre_target_state_activating": 0, + "unit_remote-fs-pre_target_state_active": 0, + "unit_remote-fs-pre_target_state_deactivating": 0, + "unit_remote-fs-pre_target_state_failed": 0, + "unit_remote-fs-pre_target_state_inactive": 1, + "unit_rpc_pipefs_target_state_activating": 0, + "unit_rpc_pipefs_target_state_active": 0, + "unit_rpc_pipefs_target_state_deactivating": 0, + "unit_rpc_pipefs_target_state_failed": 0, + "unit_rpc_pipefs_target_state_inactive": 1, + "unit_run-user-1000-gvfs_mount_state_activating": 0, + "unit_run-user-1000-gvfs_mount_state_active": 1, + "unit_run-user-1000-gvfs_mount_state_deactivating": 0, + "unit_run-user-1000-gvfs_mount_state_failed": 0, + "unit_run-user-1000-gvfs_mount_state_inactive": 0, + "unit_run-user-1000_mount_state_activating": 0, + "unit_run-user-1000_mount_state_active": 1, + "unit_run-user-1000_mount_state_deactivating": 0, + "unit_run-user-1000_mount_state_failed": 0, + "unit_run-user-1000_mount_state_inactive": 0, + "unit_session-1_scope_state_activating": 0, + "unit_session-1_scope_state_active": 1, + "unit_session-1_scope_state_deactivating": 0, + "unit_session-1_scope_state_failed": 0, + "unit_session-1_scope_state_inactive": 0, + "unit_session-2_scope_state_activating": 0, + "unit_session-2_scope_state_active": 1, + "unit_session-2_scope_state_deactivating": 0, + "unit_session-2_scope_state_failed": 0, + "unit_session-2_scope_state_inactive": 0, + "unit_session-3_scope_state_activating": 0, + "unit_session-3_scope_state_active": 1, + "unit_session-3_scope_state_deactivating": 0, + "unit_session-3_scope_state_failed": 0, + "unit_session-3_scope_state_inactive": 0, + "unit_session-6_scope_state_activating": 0, + "unit_session-6_scope_state_active": 1, + "unit_session-6_scope_state_deactivating": 0, + "unit_session-6_scope_state_failed": 0, + "unit_session-6_scope_state_inactive": 0, + "unit_shadow_timer_state_activating": 0, + "unit_shadow_timer_state_active": 1, + "unit_shadow_timer_state_deactivating": 0, + "unit_shadow_timer_state_failed": 0, + "unit_shadow_timer_state_inactive": 0, + "unit_sound_target_state_activating": 0, + "unit_sound_target_state_active": 1, + "unit_sound_target_state_deactivating": 0, + "unit_sound_target_state_failed": 0, + "unit_sound_target_state_inactive": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_activating": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_active": 1, + "unit_sys-devices-virtual-net-loopback1_device_state_deactivating": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_failed": 0, + "unit_sys-devices-virtual-net-loopback1_device_state_inactive": 0, + "unit_sys-module-fuse_device_state_activating": 0, + "unit_sys-module-fuse_device_state_active": 1, + "unit_sys-module-fuse_device_state_deactivating": 0, + "unit_sys-module-fuse_device_state_failed": 0, + "unit_sys-module-fuse_device_state_inactive": 0, + "unit_sysinit_target_state_activating": 0, + "unit_sysinit_target_state_active": 1, + "unit_sysinit_target_state_deactivating": 0, + "unit_sysinit_target_state_failed": 0, + "unit_sysinit_target_state_inactive": 0, + "unit_system-getty_slice_state_activating": 0, + "unit_system-getty_slice_state_active": 1, + "unit_system-getty_slice_state_deactivating": 0, + "unit_system-getty_slice_state_failed": 0, + "unit_system-getty_slice_state_inactive": 0, + "unit_system-netctl_slice_state_activating": 0, + "unit_system-netctl_slice_state_active": 1, + "unit_system-netctl_slice_state_deactivating": 0, + "unit_system-netctl_slice_state_failed": 0, + "unit_system-netctl_slice_state_inactive": 0, + "unit_system-systemd-fsck_slice_state_activating": 0, + "unit_system-systemd-fsck_slice_state_active": 1, + "unit_system-systemd-fsck_slice_state_deactivating": 0, + "unit_system-systemd-fsck_slice_state_failed": 0, + "unit_system-systemd-fsck_slice_state_inactive": 0, + "unit_system_slice_state_activating": 0, + "unit_system_slice_state_active": 1, + "unit_system_slice_state_deactivating": 0, + "unit_system_slice_state_failed": 0, + "unit_system_slice_state_inactive": 0, + "unit_systemd-ask-password-console_path_state_activating": 0, + "unit_systemd-ask-password-console_path_state_active": 1, + "unit_systemd-ask-password-console_path_state_deactivating": 0, + "unit_systemd-ask-password-console_path_state_failed": 0, + "unit_systemd-ask-password-console_path_state_inactive": 0, + "unit_systemd-ask-password-wall_path_state_activating": 0, + "unit_systemd-ask-password-wall_path_state_active": 1, + "unit_systemd-ask-password-wall_path_state_deactivating": 0, + "unit_systemd-ask-password-wall_path_state_failed": 0, + "unit_systemd-ask-password-wall_path_state_inactive": 0, + "unit_systemd-ask-password-wall_service_state_activating": 0, + "unit_systemd-ask-password-wall_service_state_active": 0, + "unit_systemd-ask-password-wall_service_state_deactivating": 0, + "unit_systemd-ask-password-wall_service_state_failed": 0, + "unit_systemd-ask-password-wall_service_state_inactive": 1, + "unit_systemd-fsck-root_service_state_activating": 0, + "unit_systemd-fsck-root_service_state_active": 0, + "unit_systemd-fsck-root_service_state_deactivating": 0, + "unit_systemd-fsck-root_service_state_failed": 0, + "unit_systemd-fsck-root_service_state_inactive": 1, + "unit_systemd-udevd-kernel_socket_state_activating": 0, + "unit_systemd-udevd-kernel_socket_state_active": 1, + "unit_systemd-udevd-kernel_socket_state_deactivating": 0, + "unit_systemd-udevd-kernel_socket_state_failed": 0, + "unit_systemd-udevd-kernel_socket_state_inactive": 0, + "unit_tmp_mount_state_activating": 0, + "unit_tmp_mount_state_active": 1, + "unit_tmp_mount_state_deactivating": 0, + "unit_tmp_mount_state_failed": 0, + "unit_tmp_mount_state_inactive": 0, + "unit_user-runtime-dir@1000_service_state_activating": 0, + "unit_user-runtime-dir@1000_service_state_active": 1, + "unit_user-runtime-dir@1000_service_state_deactivating": 0, + "unit_user-runtime-dir@1000_service_state_failed": 0, + "unit_user-runtime-dir@1000_service_state_inactive": 0, + "unit_user@1000_service_state_activating": 0, + "unit_user@1000_service_state_active": 1, + "unit_user@1000_service_state_deactivating": 0, + "unit_user@1000_service_state_failed": 0, + "unit_user@1000_service_state_inactive": 0, + "unit_user_slice_state_activating": 0, + "unit_user_slice_state_active": 1, + "unit_user_slice_state_deactivating": 0, + "unit_user_slice_state_failed": 0, + "unit_user_slice_state_inactive": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_activating": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_active": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_deactivating": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_failed": 0, + "unit_var-lib-nfs-rpc_pipefs_mount_state_inactive": 1, + }, + }, + "success on systemd v230+ on collecting only 'service' unit type": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*.service"} + systemd.client = prepareOKClient(230) + return systemd + }, + wantCollected: map[string]int64{ + "unit_systemd-ask-password-wall_service_state_activating": 0, + "unit_systemd-ask-password-wall_service_state_active": 0, + "unit_systemd-ask-password-wall_service_state_deactivating": 0, + "unit_systemd-ask-password-wall_service_state_failed": 0, + "unit_systemd-ask-password-wall_service_state_inactive": 1, + "unit_systemd-fsck-root_service_state_activating": 0, + "unit_systemd-fsck-root_service_state_active": 0, + "unit_systemd-fsck-root_service_state_deactivating": 0, + "unit_systemd-fsck-root_service_state_failed": 0, + "unit_systemd-fsck-root_service_state_inactive": 1, + "unit_user-runtime-dir@1000_service_state_activating": 0, + "unit_user-runtime-dir@1000_service_state_active": 1, + "unit_user-runtime-dir@1000_service_state_deactivating": 0, + "unit_user-runtime-dir@1000_service_state_failed": 0, + "unit_user-runtime-dir@1000_service_state_inactive": 0, + "unit_user@1000_service_state_activating": 0, + "unit_user@1000_service_state_active": 1, + "unit_user@1000_service_state_deactivating": 0, + "unit_user@1000_service_state_failed": 0, + "unit_user@1000_service_state_inactive": 0, + }, + }, + "success on systemd v230- on collecting only 'service' unit type": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*.service"} + systemd.client = prepareOKClient(220) + return systemd + }, + wantCollected: map[string]int64{ + "unit_systemd-ask-password-wall_service_state_activating": 0, + "unit_systemd-ask-password-wall_service_state_active": 0, + "unit_systemd-ask-password-wall_service_state_deactivating": 0, + "unit_systemd-ask-password-wall_service_state_failed": 0, + "unit_systemd-ask-password-wall_service_state_inactive": 1, + "unit_systemd-fsck-root_service_state_activating": 0, + "unit_systemd-fsck-root_service_state_active": 0, + "unit_systemd-fsck-root_service_state_deactivating": 0, + "unit_systemd-fsck-root_service_state_failed": 0, + "unit_systemd-fsck-root_service_state_inactive": 1, + "unit_user-runtime-dir@1000_service_state_activating": 0, + "unit_user-runtime-dir@1000_service_state_active": 1, + "unit_user-runtime-dir@1000_service_state_deactivating": 0, + "unit_user-runtime-dir@1000_service_state_failed": 0, + "unit_user-runtime-dir@1000_service_state_inactive": 0, + "unit_user@1000_service_state_activating": 0, + "unit_user@1000_service_state_active": 1, + "unit_user@1000_service_state_deactivating": 0, + "unit_user@1000_service_state_failed": 0, + "unit_user@1000_service_state_inactive": 0, + }, + }, + "fails when all unites are filtered": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.Include = []string{"*.not_exists"} + systemd.client = prepareOKClient(230) + return systemd + }, + wantCollected: nil, + }, + "fails on error on connect": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.client = prepareClientErrOnConnect() + return systemd + }, + wantCollected: nil, + }, + "fails on error on get manager property": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.client = prepareClientErrOnGetManagerProperty() + return systemd + }, + wantCollected: nil, + }, + "fails on error on list units": { + prepare: func() *SystemdUnits { + systemd := New() + systemd.client = prepareClientErrOnListUnits() + return systemd + }, + wantCollected: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + systemd := test.prepare() + require.NoError(t, systemd.Init()) + + var collected map[string]int64 + + for i := 0; i < 10; i++ { + collected = systemd.Collect() + } + + assert.Equal(t, test.wantCollected, collected) + if len(test.wantCollected) > 0 { + ensureCollectedHasAllChartsDimsVarsIDs(t, systemd, collected) + } + }) + } +} + +func TestSystemdUnits_connectionReuse(t *testing.T) { + systemd := New() + systemd.Include = []string{"*"} + client := prepareOKClient(230) + systemd.client = client + require.NoError(t, systemd.Init()) + + var collected map[string]int64 + for i := 0; i < 10; i++ { + collected = systemd.Collect() + } + + assert.NotEmpty(t, collected) + assert.Equal(t, 1, client.connectCalls) +} + +func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, sd *SystemdUnits, collected map[string]int64) { + for _, chart := range *sd.Charts() { + if chart.Obsolete { + continue + } + for _, dim := range chart.Dims { + _, ok := collected[dim.ID] + assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) + } + for _, v := range chart.Vars { + _, ok := collected[v.ID] + assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) + } + } +} + +func prepareOKClient(ver int) *mockClient { + return &mockClient{ + conn: &mockConn{ + version: ver, + units: mockSystemdUnits, + }, + } +} + +func prepareClientErrOnConnect() *mockClient { + return &mockClient{ + errOnConnect: true, + } +} + +func prepareClientErrOnGetManagerProperty() *mockClient { + return &mockClient{ + conn: &mockConn{ + version: 230, + errOnGetManagerProperty: true, + units: mockSystemdUnits, + }, + } +} + +func prepareClientErrOnListUnits() *mockClient { + return &mockClient{ + conn: &mockConn{ + version: 230, + errOnListUnits: true, + units: mockSystemdUnits, + }, + } +} + +type mockClient struct { + conn systemdConnection + connectCalls int + errOnConnect bool +} + +func (m *mockClient) connect() (systemdConnection, error) { + m.connectCalls++ + if m.errOnConnect { + return nil, errors.New("mock 'connect' error") + } + return m.conn, nil +} + +type mockConn struct { + version int + units []dbus.UnitStatus + errOnGetManagerProperty bool + errOnListUnits bool + closeCalled bool +} + +func (m *mockConn) Close() { + m.closeCalled = true +} + +func (m *mockConn) GetManagerProperty(prop string) (string, error) { + if m.errOnGetManagerProperty { + return "", errors.New("'GetManagerProperty' call error") + } + if prop != versionProperty { + return "", fmt.Errorf("'GetManagerProperty' unkown property: %s", prop) + } + return fmt.Sprintf("%d.6-1-manjaro", m.version), nil +} + +func (m *mockConn) ListUnitsContext(_ context.Context) ([]dbus.UnitStatus, error) { + if m.errOnListUnits { + return nil, errors.New("'ListUnits' call error") + } + if m.version >= 230 { + return nil, errors.New("'ListUnits' unsupported function error") + } + return append([]dbus.UnitStatus{}, m.units...), nil +} + +func (m *mockConn) ListUnitsByPatternsContext(_ context.Context, _ []string, ps []string) ([]dbus.UnitStatus, error) { + if m.errOnListUnits { + return nil, errors.New("'ListUnitsByPatterns' call error") + } + if m.version < 230 { + return nil, errors.New("'ListUnitsByPatterns' unsupported function error") + } + + matches := func(name string) bool { + for _, p := range ps { + if ok, _ := filepath.Match(p, name); ok { + return true + } + } + return false + } + + var units []dbus.UnitStatus + for _, unit := range m.units { + if matches(unit.Name) { + units = append(units, unit) + } + } + return units, nil +} + +var mockSystemdUnits = []dbus.UnitStatus{ + {Name: `proc-sys-fs-binfmt_misc.automount`, LoadState: "loaded", ActiveState: "active"}, + {Name: `dev-nvme0n1.device`, LoadState: "loaded", ActiveState: "active"}, + {Name: `sys-devices-virtual-net-loopback1.device`, LoadState: "loaded", ActiveState: "active"}, + {Name: `sys-module-fuse.device`, LoadState: "loaded", ActiveState: "active"}, + {Name: `dev-disk-by\x2duuid-DE44\x2dCEE0.device`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `var-lib-nfs-rpc_pipefs.mount`, LoadState: "loaded", ActiveState: "inactive"}, + {Name: `var.mount`, LoadState: "not-found", ActiveState: "inactive"}, + {Name: `run-user-1000.mount`, LoadState: "loaded", ActiveState: "active"}, + {Name: `tmp.mount`, LoadState: "loaded", ActiveState: "active"}, + {Name: `run-user-1000-gvfs.mount`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `org.cups.cupsd.path`, LoadState: "loaded", ActiveState: "active"}, + {Name: `systemd-ask-password-wall.path`, LoadState: "loaded", ActiveState: "active"}, + {Name: `systemd-ask-password-console.path`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `init.scope`, LoadState: "loaded", ActiveState: "active"}, + {Name: `session-3.scope`, LoadState: "loaded", ActiveState: "active"}, + {Name: `session-6.scope`, LoadState: "loaded", ActiveState: "active"}, + {Name: `session-1.scope`, LoadState: "loaded", ActiveState: "active"}, + {Name: `session-2.scope`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `systemd-fsck-root.service`, LoadState: "loaded", ActiveState: "inactive"}, + {Name: `httpd.service`, LoadState: "not-found", ActiveState: "inactive"}, + {Name: `user-runtime-dir@1000.service`, LoadState: "loaded", ActiveState: "active"}, + {Name: `systemd-ask-password-wall.service`, LoadState: "loaded", ActiveState: "inactive"}, + {Name: `user@1000.service`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `user.slice`, LoadState: "loaded", ActiveState: "active"}, + {Name: `system-getty.slice`, LoadState: "loaded", ActiveState: "active"}, + {Name: `system-netctl.slice`, LoadState: "loaded", ActiveState: "active"}, + {Name: `system.slice`, LoadState: "loaded", ActiveState: "active"}, + {Name: `system-systemd\x2dfsck.slice`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `lvm2-lvmpolld.socket`, LoadState: "loaded", ActiveState: "active"}, + {Name: `docker.socket`, LoadState: "loaded", ActiveState: "active"}, + {Name: `systemd-udevd-kernel.socket`, LoadState: "loaded", ActiveState: "active"}, + {Name: `dbus.socket`, LoadState: "loaded", ActiveState: "active"}, + {Name: `lvm2-lvmetad.socket`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `getty-pre.target`, LoadState: "loaded", ActiveState: "inactive"}, + {Name: `rpc_pipefs.target`, LoadState: "loaded", ActiveState: "inactive"}, + {Name: `remote-fs-pre.target`, LoadState: "loaded", ActiveState: "inactive"}, + {Name: `sysinit.target`, LoadState: "loaded", ActiveState: "active"}, + {Name: `sound.target`, LoadState: "loaded", ActiveState: "active"}, + + {Name: `man-db.timer`, LoadState: "loaded", ActiveState: "active"}, + {Name: `pamac-mirrorlist.timer`, LoadState: "loaded", ActiveState: "active"}, + {Name: `pamac-cleancache.timer`, LoadState: "loaded", ActiveState: "active"}, + {Name: `shadow.timer`, LoadState: "loaded", ActiveState: "active"}, + {Name: `logrotate.timer`, LoadState: "loaded", ActiveState: "active"}, +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/testdata/config.json b/src/go/collectors/go.d.plugin/modules/systemdunits/testdata/config.json new file mode 100644 index 000000000..ba8e51f1c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/testdata/config.json @@ -0,0 +1,7 @@ +{ + "update_every": 123, + "timeout": 123.123, + "include": [ + "ok" + ] +} diff --git a/src/go/collectors/go.d.plugin/modules/systemdunits/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/systemdunits/testdata/config.yaml new file mode 100644 index 000000000..377e4145d --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/systemdunits/testdata/config.yaml @@ -0,0 +1,4 @@ +update_every: 123 +timeout: 123.123 +include: + - "ok" |