diff options
Diffstat (limited to 'health/health.d/systemdunits.conf')
-rw-r--r-- | health/health.d/systemdunits.conf | 105 |
1 files changed, 52 insertions, 53 deletions
diff --git a/health/health.d/systemdunits.conf b/health/health.d/systemdunits.conf index 38213a8db..531d62fac 100644 --- a/health/health.d/systemdunits.conf +++ b/health/health.d/systemdunits.conf @@ -1,142 +1,141 @@ -## Check if the are any systemd units in the failed state (crashed). -## States: 1 - active, 2 - inactive, 3 - activating, 4 - deactivating, 5 - failed. +# you can disable an alarm notification by setting the 'to' line to: silent ## Service units - template: systemd_service_units_state - on: systemd.service_units_state + template: systemd_service_unit_failed_state + on: systemd.service_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd service units are in the failed state + info: systemd service unit in the failed state to: sysadmin ## Socket units - template: systemd_socket_units_state + template: systemd_socket_unit_failed_state on: systemd.socket_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd socket units are in the failed state + info: systemd socket unit in the failed state to: sysadmin ## Target units - template: systemd_target_units_state + template: systemd_target_unit_failed_state on: systemd.target_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd target units are in the failed state + info: systemd target unit in the failed state to: sysadmin ## Path units - template: systemd_path_units_state + template: systemd_path_unit_failed_state on: systemd.path_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd path units are in the failed state + info: systemd path unit in the failed state to: sysadmin ## Device units - template: systemd_device_units_state + template: systemd_device_unit_failed_state on: systemd.device_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more the systemd device units are in the failed state + info: systemd device unit in the failed state to: sysadmin ## Mount units - template: systemd_mount_units_state + template: systemd_mount_unit_failed_state on: systemd.mount_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more the systemd mount units are in the failed state + info: systemd mount units in the failed state to: sysadmin ## Automount units - template: systemd_automount_units_state + template: systemd_automount_unit_failed_state on: systemd.automount_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd automount units are in the failed state + info: systemd automount unit in the failed state to: sysadmin ## Swap units - template: systemd_swap_units_state + template: systemd_swap_unit_failed_state on: systemd.swap_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd swap units are in the failed state + info: systemd swap units in the failed state to: sysadmin ## Scope units - template: systemd_scope_units_state + template: systemd_scope_unit_failed_state on: systemd.scope_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd scope units are in the failed state + info: systemd scope units in the failed state to: sysadmin ## Slice units - template: systemd_slice_units_state + template: systemd_slice_unit_failed_state on: systemd.slice_unit_state class: Errors type: Linux component: Systemd units - lookup: max -1s min2max - units: ok/failed + calc: $failed + units: state every: 10s - warn: $this != nan AND $this == 5 + warn: $this != nan AND $this == 1 delay: down 5m multiplier 1.5 max 1h - info: one or more systemd slice units are in the failed state + info: systemd slice units in the failed state to: sysadmin |