From b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 5 May 2024 13:19:16 +0200 Subject: Adding upstream version 1.46.3. Signed-off-by: Daniel Baumann --- .../dbengine/10min_dbengine_global_flushing_errors.md | 13 +++++++++++++ .../dbengine/10min_dbengine_global_flushing_warnings.md | 15 +++++++++++++++ .../guides/dbengine/10min_dbengine_global_fs_errors.md | 14 ++++++++++++++ .../guides/dbengine/10min_dbengine_global_io_errors.md | 14 ++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 src/health/guides/dbengine/10min_dbengine_global_flushing_errors.md create mode 100644 src/health/guides/dbengine/10min_dbengine_global_flushing_warnings.md create mode 100644 src/health/guides/dbengine/10min_dbengine_global_fs_errors.md create mode 100644 src/health/guides/dbengine/10min_dbengine_global_io_errors.md (limited to 'src/health/guides/dbengine') diff --git a/src/health/guides/dbengine/10min_dbengine_global_flushing_errors.md b/src/health/guides/dbengine/10min_dbengine_global_flushing_errors.md new file mode 100644 index 000000000..7548c2d7e --- /dev/null +++ b/src/health/guides/dbengine/10min_dbengine_global_flushing_errors.md @@ -0,0 +1,13 @@ +### Understand the alert + +The Database Engine works like a traditional database. It dedicates a certain amount of RAM to data caching and indexing, while the rest of the data resides compressed on disk. Unlike other memory modes, the amount of historical metrics stored is based on the amount of disk space you allocate and the effective compression ratio, not a fixed number of metrics collected. + +By using both RAM and disk space, the database engine allows for long-term storage of per-second metrics inside of the Netdata Agent itself. + +Netdata monitors the number of pages deleted due to failure to flush data to disk in the last 10 minutes. In this situation some metric data was dropped to unblock data collection. To remedy this issue, reduce disk load or use +faster disks. This alert is triggered in critical state when the number deleted pages is greater than 0. + +### Useful resources + +[Read more about Netdata DB engine](/src/database/README.md/engine) + diff --git a/src/health/guides/dbengine/10min_dbengine_global_flushing_warnings.md b/src/health/guides/dbengine/10min_dbengine_global_flushing_warnings.md new file mode 100644 index 000000000..444796703 --- /dev/null +++ b/src/health/guides/dbengine/10min_dbengine_global_flushing_warnings.md @@ -0,0 +1,15 @@ +### Understand the alert + +The Database Engine works like a traditional database. It dedicates a certain amount of RAM to data caching and indexing, while the rest of the data resides compressed on disk. Unlike other memory modes, the amount of historical metrics stored is based on the amount of disk space you allocate and the effective compression ratio, not a fixed number +of metrics collected. + +By using both RAM and disk space, the database engine allows for long-term storage of per-second metrics inside of the Netdata Agent itself. + +Netdata monitors the number of times when `dbengine` dirty pages were over 50% of the instance page cache in the last 10 minutes. In this situation, the metric data are at risk of not being stored in the database. To remedy this issue, reduce disk load or use faster disks. + +This alert is triggered in warn state when the number of `dbengine` dirty pages which were over 50% of the instance is greater than 0. + +### Useful resources + +[Read more about Netdata DB engine](/src/database/README.md/engine) + diff --git a/src/health/guides/dbengine/10min_dbengine_global_fs_errors.md b/src/health/guides/dbengine/10min_dbengine_global_fs_errors.md new file mode 100644 index 000000000..a4093681b --- /dev/null +++ b/src/health/guides/dbengine/10min_dbengine_global_fs_errors.md @@ -0,0 +1,14 @@ +### Understand the alert + +The Database Engine works like a traditional database. It dedicates a certain amount of RAM to data caching and indexing, while the rest of the data resides compressed on disk. Unlike other memory modes, the amount of historical metrics stored is based on the amount of disk space you allocate and the effective compression ratio, not a fixed number of metrics collected. + +By using both RAM and disk space, the database engine allows for long-term storage of per-second metrics inside of the Netdata agent itself. + +Netdata monitors the number of filesystem errors in the last 10 minutes. The Dbengine is experiencing filesystem errors (too many open files, wrong permissions, etc.) + +This alert is triggered in warning state when the number of filesystem errors is greater than 0. + +### Useful resources + +[Read more about Netdata DB engine](/src/database/README.md/engine) + diff --git a/src/health/guides/dbengine/10min_dbengine_global_io_errors.md b/src/health/guides/dbengine/10min_dbengine_global_io_errors.md new file mode 100644 index 000000000..6bb831669 --- /dev/null +++ b/src/health/guides/dbengine/10min_dbengine_global_io_errors.md @@ -0,0 +1,14 @@ +### Understand the alert + +The Database Engine works like a traditional database. It dedicates a certain amount of RAM to data caching and indexing, while the rest of the data resides compressed on disk. Unlike other memory modes, the amount of historical metrics stored is based on the amount of disk space you allocate and the effective compression ratio, not a fixed number of metrics collected. + +By using both RAM and disk space, the database engine allows for long-term storage of per-second metrics inside of the Netdata Agent itself. + +The Netdata Agent monitors the number of IO errors in the last 10 minutes. The dbengine is experiencing I/O errors (CRC errors, out of space, bad disk, etc.). + +This alert is triggered in critical state when the number of IO errors is greater that 0. + +### Useful resources + +[Read more about Netdata DB engine](/src/database/README.md/engine) + -- cgit v1.2.3