From b485aab7e71c1625cfc27e0f92c9509f42378458 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 5 May 2024 13:19:16 +0200 Subject: Adding upstream version 1.45.3+dfsg. Signed-off-by: Daniel Baumann --- health/health.d/memory.conf | 85 --------------------------------------------- 1 file changed, 85 deletions(-) delete mode 100644 health/health.d/memory.conf (limited to 'health/health.d/memory.conf') diff --git a/health/health.d/memory.conf b/health/health.d/memory.conf deleted file mode 100644 index 5ab3d2d92..000000000 --- a/health/health.d/memory.conf +++ /dev/null @@ -1,85 +0,0 @@ -# you can disable an alarm notification by setting the 'to' line to: silent - - alarm: 1hour_memory_hw_corrupted - on: mem.hwcorrupt - class: Errors - type: System -component: Memory - os: linux - hosts: * - calc: $HardwareCorrupted - units: MB - every: 10s - warn: $this > 0 - delay: down 1h multiplier 1.5 max 1h - summary: System corrupted memory - info: Amount of memory corrupted due to a hardware failure - to: sysadmin - -## ECC Controller - - template: ecc_memory_mc_correctable - on: mem.edac_mc - class: Errors - type: System -component: Memory - os: linux - hosts: * - lookup: sum -10m unaligned of correctable, correctable_noinfo - units: errors - every: 1m - warn: $this > 0 - delay: down 1h multiplier 1.5 max 1h - summary: System ECC memory ${label:controller} correctable errors - info: Memory controller ${label:controller} ECC correctable errors in the last 10 minutes - to: sysadmin - - template: ecc_memory_mc_uncorrectable - on: mem.edac_mc - class: Errors - type: System -component: Memory - os: linux - hosts: * - lookup: sum -10m unaligned of uncorrectable,uncorrectable_noinfo - units: errors - every: 1m - crit: $this > 0 - delay: down 1h multiplier 1.5 max 1h - summary: System ECC memory ${label:controller} uncorrectable errors - info: Memory controller ${label:controller} ECC uncorrectable errors in the last 10 minutes - to: sysadmin - -## ECC DIMM - - template: ecc_memory_dimm_correctable - on: mem.edac_mc_dimm - class: Errors - type: System -component: Memory - os: linux - hosts: * - lookup: sum -10m unaligned of correctable - units: errors - every: 1m - warn: $this > 0 - delay: down 1h multiplier 1.5 max 1h - summary: System ECC memory DIMM ${label:dimm} correctable errors - info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes - to: sysadmin - - template: ecc_memory_dimm_uncorrectable - on: mem.edac_mc_dimm - class: Errors - type: System -component: Memory - os: linux - hosts: * - lookup: sum -10m unaligned of uncorrectable - units: errors - every: 1m - crit: $this > 0 - delay: down 1h multiplier 1.5 max 1h - summary: System ECC memory DIMM ${label:dimm} uncorrectable errors - info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes - to: sysadmin -- cgit v1.2.3