summaryrefslogtreecommitdiffstats
path: root/src/health/health.d/systemdunits.conf
diff options
context:
space:
mode:
Diffstat (limited to 'src/health/health.d/systemdunits.conf')
-rw-r--r--src/health/health.d/systemdunits.conf177
1 files changed, 177 insertions, 0 deletions
diff --git a/src/health/health.d/systemdunits.conf b/src/health/health.d/systemdunits.conf
new file mode 100644
index 000000000..bb5c627e8
--- /dev/null
+++ b/src/health/health.d/systemdunits.conf
@@ -0,0 +1,177 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+## Service units
+ template: systemd_service_unit_failed_state
+ on: systemd.service_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd service unit in the failed state
+ to: sysadmin
+
+## Socket units
+ template: systemd_socket_unit_failed_state
+ on: systemd.socket_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd socket unit in the failed state
+ to: sysadmin
+
+## Target units
+ template: systemd_target_unit_failed_state
+ on: systemd.target_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd target unit in the failed state
+ to: sysadmin
+
+## Path units
+ template: systemd_path_unit_failed_state
+ on: systemd.path_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd path unit in the failed state
+ to: sysadmin
+
+## Device units
+ template: systemd_device_unit_failed_state
+ on: systemd.device_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd device unit in the failed state
+ to: sysadmin
+
+## Mount units
+ template: systemd_mount_unit_failed_state
+ on: systemd.mount_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd mount units in the failed state
+ to: sysadmin
+
+## Automount units
+ template: systemd_automount_unit_failed_state
+ on: systemd.automount_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd automount unit in the failed state
+ to: sysadmin
+
+## Swap units
+ template: systemd_swap_unit_failed_state
+ on: systemd.swap_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd swap units in the failed state
+ to: sysadmin
+
+## Scope units
+ template: systemd_scope_unit_failed_state
+ on: systemd.scope_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd scope units in the failed state
+ to: sysadmin
+
+## Slice units
+ template: systemd_slice_unit_failed_state
+ on: systemd.slice_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd slice units in the failed state
+ to: sysadmin
+
+## Timer units
+ template: systemd_timer_unit_failed_state
+ on: systemd.timer_unit_state
+ class: Errors
+ type: Linux
+ component: Systemd units
+chart labels: unit_name=!*
+ calc: $failed
+ units: state
+ every: 10s
+ warn: $this != nan AND $this == 1
+ delay: down 5m multiplier 1.5 max 1h
+ summary: systemd unit ${label:unit_name} state
+ info: systemd timer unit in the failed state
+ to: sysadmin