diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-18 14:38:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-18 14:38:05 +0000 |
commit | ab2714ee67d23dc115edfc0e2bb82ab88cc17b57 (patch) | |
tree | bb9dd1e8750fea4bea85e590e36ca636f9128ad2 | |
parent | Adding upstream version 1.39.0. (diff) | |
download | netdata-ab2714ee67d23dc115edfc0e2bb82ab88cc17b57.tar.xz netdata-ab2714ee67d23dc115edfc0e2bb82ab88cc17b57.zip |
Adding upstream version 1.39.1.upstream/1.39.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r-- | CHANGELOG.md | 67 | ||||
-rw-r--r-- | aclk/aclk_tx_msgs.c | 23 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 4 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_diskstats.c | 8 | ||||
-rw-r--r-- | collectors/proc.plugin/sys_class_infiniband.c | 14 | ||||
-rw-r--r-- | database/rrdset.c | 2 | ||||
-rw-r--r-- | database/sqlite/sqlite_aclk_alert.c | 2 | ||||
-rwxr-xr-x | health/notifications/health_alarm_notify.conf | 344 | ||||
-rw-r--r-- | libnetdata/libnetdata.c | 2 | ||||
-rw-r--r-- | ml/Config.cc | 13 | ||||
-rw-r--r-- | ml/README.md | 6 | ||||
-rw-r--r-- | ml/ad_charts.cc | 4 | ||||
-rw-r--r-- | ml/ml-private.h | 11 | ||||
-rw-r--r-- | ml/ml.cc | 85 | ||||
-rwxr-xr-x | packaging/makeself/install-or-update.sh | 3 | ||||
-rw-r--r-- | packaging/version | 2 |
16 files changed, 337 insertions, 253 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e5349fa7..e45e0fe68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +## [v1.39.1](https://github.com/netdata/netdata/tree/v1.39.1) (2023-05-18) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.39.0...v1.39.1) + +**Merged pull requests:** + +- Update netdata-security.md [\#15068](https://github.com/netdata/netdata/pull/15068) ([cakrit](https://github.com/cakrit)) +- Update netdata-security.md [\#15067](https://github.com/netdata/netdata/pull/15067) ([cakrit](https://github.com/cakrit)) +- Update CODEOWNERS [\#15064](https://github.com/netdata/netdata/pull/15064) ([cakrit](https://github.com/cakrit)) +- Update netdata-security.md [\#15063](https://github.com/netdata/netdata/pull/15063) ([sashwathn](https://github.com/sashwathn)) +- Fix memory leak when sending alerts checkoint [\#15060](https://github.com/netdata/netdata/pull/15060) ([stelfrag](https://github.com/stelfrag)) +- bump go.d.plugin to v0.53.0 [\#15059](https://github.com/netdata/netdata/pull/15059) ([ilyam8](https://github.com/ilyam8)) +- Fix ACLK memleak [\#15055](https://github.com/netdata/netdata/pull/15055) ([underhood](https://github.com/underhood)) +- fix\(debugfs/zswap\): don't collect metrics if Zswap is disabled [\#15054](https://github.com/netdata/netdata/pull/15054) ([ilyam8](https://github.com/ilyam8)) +- Comment out default `role_recipients_*` values [\#15047](https://github.com/netdata/netdata/pull/15047) ([jamgregory](https://github.com/jamgregory)) +- Small update ml defaults [\#15046](https://github.com/netdata/netdata/pull/15046) ([andrewm4894](https://github.com/andrewm4894)) +- Fix handling of permissions in static installs. [\#15042](https://github.com/netdata/netdata/pull/15042) ([Ferroin](https://github.com/Ferroin)) +- Update tor.chart.py [\#15041](https://github.com/netdata/netdata/pull/15041) ([jmphilippe](https://github.com/jmphilippe)) +- Wording fix in interact with charts doc [\#15040](https://github.com/netdata/netdata/pull/15040) ([Ancairon](https://github.com/Ancairon)) +- fatal in claim\(\) only if --claim-only is used [\#15039](https://github.com/netdata/netdata/pull/15039) ([ilyam8](https://github.com/ilyam8)) +- Slight wording fix on the database readme [\#15034](https://github.com/netdata/netdata/pull/15034) ([Ancairon](https://github.com/Ancairon)) +- Update SQLITE to version 3.41.2 [\#15031](https://github.com/netdata/netdata/pull/15031) ([stelfrag](https://github.com/stelfrag)) +- Update troubleshooting-agent-with-cloud-connection.md [\#15029](https://github.com/netdata/netdata/pull/15029) ([cakrit](https://github.com/cakrit)) +- Adjust buffers to prevent overflow [\#15025](https://github.com/netdata/netdata/pull/15025) ([stelfrag](https://github.com/stelfrag)) +- Reduce netdatacli size [\#15024](https://github.com/netdata/netdata/pull/15024) ([stelfrag](https://github.com/stelfrag)) +- Debugfs collector [\#15017](https://github.com/netdata/netdata/pull/15017) ([thiagoftsm](https://github.com/thiagoftsm)) +- review the billing docs for the flow [\#15014](https://github.com/netdata/netdata/pull/15014) ([hugovalente-pm](https://github.com/hugovalente-pm)) +- Rollback ML transaction on failure. [\#15013](https://github.com/netdata/netdata/pull/15013) ([vkalintiris](https://github.com/vkalintiris)) +- Silence dimensions with noisy ML models [\#15011](https://github.com/netdata/netdata/pull/15011) ([vkalintiris](https://github.com/vkalintiris)) +- Update chart documentation [\#15010](https://github.com/netdata/netdata/pull/15010) ([Ancairon](https://github.com/Ancairon)) +- Honor maximum message size limit of MQTT server [\#15009](https://github.com/netdata/netdata/pull/15009) ([underhood](https://github.com/underhood)) +- libjudy: remove JudyLTablesGen [\#14984](https://github.com/netdata/netdata/pull/14984) ([mochaaP](https://github.com/mochaaP)) +- Remove Fedora 36 from CI and platform support. [\#14938](https://github.com/netdata/netdata/pull/14938) ([Ferroin](https://github.com/Ferroin)) +- make zlib compulsory dep [\#14928](https://github.com/netdata/netdata/pull/14928) ([underhood](https://github.com/underhood)) +- Remove old logic for handling of legacy stock config files. [\#14829](https://github.com/netdata/netdata/pull/14829) ([Ferroin](https://github.com/Ferroin)) +- fix infiniband bytes counters multiplier and divisor [\#14748](https://github.com/netdata/netdata/pull/14748) ([ilyam8](https://github.com/ilyam8)) +- initial minimal h2o webserver integration [\#14585](https://github.com/netdata/netdata/pull/14585) ([underhood](https://github.com/underhood)) + ## [v1.39.0](https://github.com/netdata/netdata/tree/v1.39.0) (2023-05-08) [Full Changelog](https://github.com/netdata/netdata/compare/v1.38.1...v1.39.0) @@ -365,15 +403,6 @@ - Only load required charts for rrdvars [\#14443](https://github.com/netdata/netdata/pull/14443) ([MrZammler](https://github.com/MrZammler)) - Typos in in notification docs [\#14440](https://github.com/netdata/netdata/pull/14440) ([iorvd](https://github.com/iorvd)) - Streaming interpolated values [\#14431](https://github.com/netdata/netdata/pull/14431) ([ktsaou](https://github.com/ktsaou)) -- Fix compiler error when CLOSE\_RANGE\_CLOEXEC is missing [\#14430](https://github.com/netdata/netdata/pull/14430) ([Dim-P](https://github.com/Dim-P)) -- Add .NET info [\#14429](https://github.com/netdata/netdata/pull/14429) ([thiagoftsm](https://github.com/thiagoftsm)) -- Minor fix, convert metadata of the learn to hidden sections [\#14427](https://github.com/netdata/netdata/pull/14427) ([tkatsoulas](https://github.com/tkatsoulas)) -- kickstart.sh: Fix `--release-channel` as `--nightly-channel` options [\#14424](https://github.com/netdata/netdata/pull/14424) ([vobruba-martin](https://github.com/vobruba-martin)) -- Use curl from static builds if no system-wide copy exists. [\#14403](https://github.com/netdata/netdata/pull/14403) ([Ferroin](https://github.com/Ferroin)) -- add @andrewm4894 as docs/ codeowner [\#14398](https://github.com/netdata/netdata/pull/14398) ([andrewm4894](https://github.com/andrewm4894)) -- Roles permissions docs [\#14391](https://github.com/netdata/netdata/pull/14391) ([hugovalente-pm](https://github.com/hugovalente-pm)) -- add note about not needing to have room id [\#14390](https://github.com/netdata/netdata/pull/14390) ([andrewm4894](https://github.com/andrewm4894)) -- update the "Install Netdata with Docker" doc [\#14385](https://github.com/netdata/netdata/pull/14385) ([Ancairon](https://github.com/Ancairon)) ## [v1.38.1](https://github.com/netdata/netdata/tree/v1.38.1) (2023-02-13) @@ -387,26 +416,6 @@ - Updated w1sensor.chart.py [\#14435](https://github.com/netdata/netdata/pull/14435) ([martindue](https://github.com/martindue)) - replication to streaming transition when there are gaps [\#14434](https://github.com/netdata/netdata/pull/14434) ([ktsaou](https://github.com/ktsaou)) -- turn error\(\) to internal\_error\(\) [\#14428](https://github.com/netdata/netdata/pull/14428) ([ktsaou](https://github.com/ktsaou)) -- Fix typo on the netdata-functions.md [\#14426](https://github.com/netdata/netdata/pull/14426) ([lokerhp](https://github.com/lokerhp)) -- Update screenshot of timezone selector [\#14425](https://github.com/netdata/netdata/pull/14425) ([cakrit](https://github.com/cakrit)) -- Stop training thread from processing training requests once cancelled. [\#14423](https://github.com/netdata/netdata/pull/14423) ([vkalintiris](https://github.com/vkalintiris)) -- Check on parents the microseconds delta sent by agents [\#14422](https://github.com/netdata/netdata/pull/14422) ([ktsaou](https://github.com/ktsaou)) -- better logging of invalid pages detected on dbengine files [\#14420](https://github.com/netdata/netdata/pull/14420) ([ktsaou](https://github.com/ktsaou)) -- fix functions memory leak [\#14419](https://github.com/netdata/netdata/pull/14419) ([ktsaou](https://github.com/ktsaou)) -- Move under Developer in Learn [\#14417](https://github.com/netdata/netdata/pull/14417) ([cakrit](https://github.com/cakrit)) -- Libnetdata readmes learn [\#14416](https://github.com/netdata/netdata/pull/14416) ([cakrit](https://github.com/cakrit)) -- Minor fixes in markdown links [\#14415](https://github.com/netdata/netdata/pull/14415) ([tkatsoulas](https://github.com/tkatsoulas)) -- fix kubelet alarms [\#14414](https://github.com/netdata/netdata/pull/14414) ([ilyam8](https://github.com/ilyam8)) -- DBENGINE v2 - bug fixes [\#14413](https://github.com/netdata/netdata/pull/14413) ([ktsaou](https://github.com/ktsaou)) -- fix\(cgroups.plugin\): fix collecting full pressure stall time [\#14410](https://github.com/netdata/netdata/pull/14410) ([ilyam8](https://github.com/ilyam8)) -- feat\(charts.d\): add load usage \(Watts\) to nuts collector [\#14407](https://github.com/netdata/netdata/pull/14407) ([ilyam8](https://github.com/ilyam8)) -- fix link to ebpf collector [\#14405](https://github.com/netdata/netdata/pull/14405) ([ilyam8](https://github.com/ilyam8)) -- Remove equality when deciding how to use point [\#14402](https://github.com/netdata/netdata/pull/14402) ([MrZammler](https://github.com/MrZammler)) -- add help line to functions response [\#14399](https://github.com/netdata/netdata/pull/14399) ([ktsaou](https://github.com/ktsaou)) -- Fix typo on the page [\#14397](https://github.com/netdata/netdata/pull/14397) ([iorvd](https://github.com/iorvd)) -- Fix kickstart and updater not working with BusyBox wget [\#14392](https://github.com/netdata/netdata/pull/14392) ([Dim-P](https://github.com/Dim-P)) -- Fix publishing Docker Images to secondary registries. [\#14389](https://github.com/netdata/netdata/pull/14389) ([Ferroin](https://github.com/Ferroin)) ## [v1.37.1](https://github.com/netdata/netdata/tree/v1.37.1) (2022-12-05) diff --git a/aclk/aclk_tx_msgs.c b/aclk/aclk_tx_msgs.c index 532b964ad..86ee818ed 100644 --- a/aclk/aclk_tx_msgs.c +++ b/aclk/aclk_tx_msgs.c @@ -51,13 +51,6 @@ uint16_t aclk_send_bin_message_subtopic_pid(mqtt_wss_client client, char *msg, s return packet_id; } -// json_object_put returns int unfortunately :D -// we need void(*fnc)(void *); -static void json_object_put_wrapper(void *jsonobj) -{ - json_object_put(jsonobj); -} - #define TOPIC_MAX_LEN 512 #define V2_BIN_PAYLOAD_SEPARATOR "\x0D\x0A\x0D\x0A" static int aclk_send_message_with_bin_payload(mqtt_wss_client client, json_object *msg, const char *topic, const void *payload, size_t payload_len) @@ -76,19 +69,21 @@ static int aclk_send_message_with_bin_payload(mqtt_wss_client client, json_objec str = json_object_to_json_string_ext(msg, JSON_C_TO_STRING_PLAIN); len = strlen(str); - if (payload_len) { - full_msg = mallocz(len + strlen(V2_BIN_PAYLOAD_SEPARATOR) + payload_len); + size_t full_msg_len = len; + if (payload_len) + full_msg_len += strlen(V2_BIN_PAYLOAD_SEPARATOR) + payload_len; - memcpy(full_msg, str, len); - json_object_put(msg); - msg = NULL; + full_msg = mallocz(full_msg_len); + memcpy(full_msg, str, len); + json_object_put(msg); + + if (payload_len) { memcpy(&full_msg[len], V2_BIN_PAYLOAD_SEPARATOR, strlen(V2_BIN_PAYLOAD_SEPARATOR)); len += strlen(V2_BIN_PAYLOAD_SEPARATOR); memcpy(&full_msg[len], payload, payload_len); - len += payload_len; } - mqtt_wss_publish5(client, (char*)topic, NULL, (char*)(payload_len ? full_msg : str), (payload_len ? &freez_aclk_publish5b : &json_object_put_wrapper), len, MQTT_WSS_PUB_QOS1, &packet_id); + mqtt_wss_publish5(client, (char*)topic, NULL, full_msg, &freez_aclk_publish5b, full_msg_len, MQTT_WSS_PUB_QOS1, &packet_id); #ifdef NETDATA_INTERNAL_CHECKS aclk_stats_msg_published(packet_id); diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index 007d4245b..d9049b2fa 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -1952,7 +1952,7 @@ static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) } static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) { - char buffer[CGROUP_CHARTID_LINE_MAX]; + char buffer[CGROUP_CHARTID_LINE_MAX + 1]; cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE; strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX); char *s = buffer; @@ -2607,7 +2607,7 @@ static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { } cg->first_time_seen = 0; - char comm[TASK_COMM_LEN]; + char comm[TASK_COMM_LEN + 1]; if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) { if (strstr(cg->id, "kubepods")) { diff --git a/collectors/proc.plugin/proc_diskstats.c b/collectors/proc.plugin/proc_diskstats.c index 2a4fe4f8c..09c6498e3 100644 --- a/collectors/proc.plugin/proc_diskstats.c +++ b/collectors/proc.plugin/proc_diskstats.c @@ -348,7 +348,7 @@ static inline int get_disk_name_from_path(const char *path, char *result, size_t int found = 0, preferred = 0; - char *first_result = mallocz(result_size); + char *first_result = mallocz(result_size + 1); DIR *dir = opendir(path); if (!dir) { @@ -454,7 +454,7 @@ failed: } static inline char *get_disk_name(unsigned long major, unsigned long minor, char *disk) { - char result[FILENAME_MAX + 1] = ""; + char result[FILENAME_MAX + 2] = ""; if(!path_to_device_mapper || !*path_to_device_mapper || !get_disk_name_from_path(path_to_device_mapper, result, FILENAME_MAX + 1, major, minor, disk, NULL, 0)) if(!path_to_device_label || !*path_to_device_label || !get_disk_name_from_path(path_to_device_label, result, FILENAME_MAX + 1, major, minor, disk, NULL, 0)) @@ -615,8 +615,8 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis // read device uuid if it is an LVM volume if (!strncmp(d->device, "dm-", 3)) { char uuid_filename[FILENAME_MAX + 1]; - snprintfz(uuid_filename, FILENAME_MAX, path_to_sys_devices_virtual_block_device, disk); - strncat(uuid_filename, "/dm/uuid", FILENAME_MAX); + int size = snprintfz(uuid_filename, FILENAME_MAX, path_to_sys_devices_virtual_block_device, disk); + strncat(uuid_filename, "/dm/uuid", FILENAME_MAX - size); char device_uuid[RRD_ID_LENGTH_MAX + 1]; if (!read_file(uuid_filename, device_uuid, RRD_ID_LENGTH_MAX) && !strncmp(device_uuid, "LVM-", 4)) { diff --git a/collectors/proc.plugin/sys_class_infiniband.c b/collectors/proc.plugin/sys_class_infiniband.c index f0b7f9a52..d12a34513 100644 --- a/collectors/proc.plugin/sys_class_infiniband.c +++ b/collectors/proc.plugin/sys_class_infiniband.c @@ -469,15 +469,17 @@ int do_sys_class_infiniband(int update_every, usec_t dt) // Sample output: "100 Gb/sec (4X EDR)" snprintfz(buffer, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "rate"); char buffer_rate[65]; + p->width = 4; if (read_file(buffer, buffer_rate, 64)) { collector_error("Unable to read '%s'", buffer); - p->width = 1; } else { char *buffer_width = strstr(buffer_rate, "("); - buffer_width++; - // str2ull will stop on first non-decimal value - p->speed = str2ull(buffer_rate, NULL); - p->width = str2ull(buffer_width, NULL); + if (buffer_width) { + buffer_width++; + // str2ull will stop on first non-decimal value + p->speed = str2ull(buffer_rate, NULL); + p->width = str2ull(buffer_width, NULL); + } } if (!p->discovered) @@ -541,7 +543,7 @@ int do_sys_class_infiniband(int update_every, usec_t dt) // On this chart, we want to have a KB/s so the dashboard will autoscale it // The reported values are also per-lane, so we must multiply it by the width // x4 lanes multiplier as per Documentation/ABI/stable/sysfs-class-infiniband - FOREACH_COUNTER_BYTES(GEN_RRD_DIM_ADD_CUSTOM, port, 4 * 8 * port->width, 1024, RRD_ALGORITHM_INCREMENTAL) + FOREACH_COUNTER_BYTES(GEN_RRD_DIM_ADD_CUSTOM, port, port->width * 8, 1000, RRD_ALGORITHM_INCREMENTAL) port->stv_speed = rrdsetvar_custom_chart_variable_add_and_acquire(port->st_bytes, "link_speed"); } diff --git a/database/rrdset.c b/database/rrdset.c index 2843bb330..3177f43ff 100644 --- a/database/rrdset.c +++ b/database/rrdset.c @@ -2207,7 +2207,7 @@ bool rrdset_memory_load_or_create_map_save(RRDSET *st, RRD_MEMORY_MODE memory_mo memset(st_on_file, 0, size); // set the values we need - strncpyz(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019 + 1); + strncpyz(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019); strcpy(st_on_file->cache_filename, fullfilename); strcpy(st_on_file->magic, RRDSET_MAGIC_V019); st_on_file->memsize = size; diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c index 62f1df29d..1e5bd0b74 100644 --- a/database/sqlite/sqlite_aclk_alert.c +++ b/database/sqlite/sqlite_aclk_alert.c @@ -1114,6 +1114,7 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused) buffer_strcat(alarms_to_hash, ""); len = 0; } + freez(active_alerts); char hash[SHA256_DIGEST_LENGTH + 1]; if (hash256_string((const unsigned char *)buffer_tostring(alarms_to_hash), len, hash)) { @@ -1126,6 +1127,7 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused) alarm_checkpoint.checksum = (char *)hash; aclk_send_provide_alarm_checkpoint(&alarm_checkpoint); + freez(claim_id); log_access("ACLK RES [%s (%s)]: ALERTS CHECKPOINT SENT", wc->node_id, rrdhost_hostname(host)); } else { log_access("ACLK RES [%s (%s)]: FAILED TO CREATE ALERTS CHECKPOINT HASH", wc->node_id, rrdhost_hostname(host)); diff --git a/health/notifications/health_alarm_notify.conf b/health/notifications/health_alarm_notify.conf index b7fa6e796..ddbb8ab59 100755 --- a/health/notifications/health_alarm_notify.conf +++ b/health/notifications/health_alarm_notify.conf @@ -956,365 +956,365 @@ custom_sender() { # generic system alarms # CPU, disks, network interfaces, entropy, etc -role_recipients_email[sysadmin]="${DEFAULT_RECIPIENT_EMAIL}" +# role_recipients_email[sysadmin]="${DEFAULT_RECIPIENT_EMAIL}" -role_recipients_hangouts[sysadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" +# role_recipients_hangouts[sysadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" -role_recipients_pushover[sysadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" +# role_recipients_pushover[sysadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" -role_recipients_pushbullet[sysadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" +# role_recipients_pushbullet[sysadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" -role_recipients_telegram[sysadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" +# role_recipients_telegram[sysadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" -role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}" +# role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}" -role_recipients_alerta[sysadmin]="${DEFAULT_RECIPIENT_ALERTA}" +# role_recipients_alerta[sysadmin]="${DEFAULT_RECIPIENT_ALERTA}" -role_recipients_flock[sysadmin]="${DEFAULT_RECIPIENT_FLOCK}" +# role_recipients_flock[sysadmin]="${DEFAULT_RECIPIENT_FLOCK}" -role_recipients_discord[sysadmin]="${DEFAULT_RECIPIENT_DISCORD}" +# role_recipients_discord[sysadmin]="${DEFAULT_RECIPIENT_DISCORD}" -role_recipients_hipchat[sysadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" +# role_recipients_hipchat[sysadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" -role_recipients_twilio[sysadmin]="${DEFAULT_RECIPIENT_TWILIO}" +# role_recipients_twilio[sysadmin]="${DEFAULT_RECIPIENT_TWILIO}" -role_recipients_messagebird[sysadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" +# role_recipients_messagebird[sysadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" -role_recipients_kavenegar[sysadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" +# role_recipients_kavenegar[sysadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" -role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}" +# role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}" -role_recipients_fleep[sysadmin]="${DEFAULT_RECIPIENT_FLEEP}" +# role_recipients_fleep[sysadmin]="${DEFAULT_RECIPIENT_FLEEP}" -role_recipients_irc[sysadmin]="${DEFAULT_RECIPIENT_IRC}" +# role_recipients_irc[sysadmin]="${DEFAULT_RECIPIENT_IRC}" -role_recipients_syslog[sysadmin]="${DEFAULT_RECIPIENT_SYSLOG}" +# role_recipients_syslog[sysadmin]="${DEFAULT_RECIPIENT_SYSLOG}" -role_recipients_prowl[sysadmin]="${DEFAULT_RECIPIENT_PROWL}" +# role_recipients_prowl[sysadmin]="${DEFAULT_RECIPIENT_PROWL}" -role_recipients_awssns[sysadmin]="${DEFAULT_RECIPIENT_AWSSNS}" +# role_recipients_awssns[sysadmin]="${DEFAULT_RECIPIENT_AWSSNS}" -role_recipients_custom[sysadmin]="${DEFAULT_RECIPIENT_CUSTOM}" +# role_recipients_custom[sysadmin]="${DEFAULT_RECIPIENT_CUSTOM}" -role_recipients_msteams[sysadmin]="${DEFAULT_RECIPIENT_MSTEAMS}" +# role_recipients_msteams[sysadmin]="${DEFAULT_RECIPIENT_MSTEAMS}" -role_recipients_rocketchat[sysadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" +# role_recipients_rocketchat[sysadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" -role_recipients_dynatrace[sysadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" +# role_recipients_dynatrace[sysadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" -role_recipients_opsgenie[sysadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" +# role_recipients_opsgenie[sysadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" -role_recipients_matrix[sysadmin]="${DEFAULT_RECIPIENT_MATRIX}" +# role_recipients_matrix[sysadmin]="${DEFAULT_RECIPIENT_MATRIX}" -role_recipients_stackpulse[sysadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" +# role_recipients_stackpulse[sysadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" -role_recipients_gotify[sysadmin]="${DEFAULT_RECIPIENT_GOTIFY}" +# role_recipients_gotify[sysadmin]="${DEFAULT_RECIPIENT_GOTIFY}" -role_recipients_ntfy[sysadmin]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ntfy[sysadmin]="${DEFAULT_RECIPIENT_NTFY}" # ----------------------------------------------------------------------------- # DNS related alarms -role_recipients_email[domainadmin]="${DEFAULT_RECIPIENT_EMAIL}" +# role_recipients_email[domainadmin]="${DEFAULT_RECIPIENT_EMAIL}" -role_recipients_hangouts[domainadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" +# role_recipients_hangouts[domainadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" -role_recipients_pushover[domainadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" +# role_recipients_pushover[domainadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" -role_recipients_pushbullet[domainadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" +# role_recipients_pushbullet[domainadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" -role_recipients_telegram[domainadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" +# role_recipients_telegram[domainadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" -role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}" +# role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}" -role_recipients_alerta[domainadmin]="${DEFAULT_RECIPIENT_ALERTA}" +# role_recipients_alerta[domainadmin]="${DEFAULT_RECIPIENT_ALERTA}" -role_recipients_flock[domainadmin]="${DEFAULT_RECIPIENT_FLOCK}" +# role_recipients_flock[domainadmin]="${DEFAULT_RECIPIENT_FLOCK}" -role_recipients_discord[domainadmin]="${DEFAULT_RECIPIENT_DISCORD}" +# role_recipients_discord[domainadmin]="${DEFAULT_RECIPIENT_DISCORD}" -role_recipients_hipchat[domainadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" +# role_recipients_hipchat[domainadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" -role_recipients_twilio[domainadmin]="${DEFAULT_RECIPIENT_TWILIO}" +# role_recipients_twilio[domainadmin]="${DEFAULT_RECIPIENT_TWILIO}" -role_recipients_messagebird[domainadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" +# role_recipients_messagebird[domainadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" -role_recipients_kavenegar[domainadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" +# role_recipients_kavenegar[domainadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" -role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}" +# role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}" -role_recipients_fleep[domainadmin]="${DEFAULT_RECIPIENT_FLEEP}" +# role_recipients_fleep[domainadmin]="${DEFAULT_RECIPIENT_FLEEP}" -role_recipients_irc[domainadmin]="${DEFAULT_RECIPIENT_IRC}" +# role_recipients_irc[domainadmin]="${DEFAULT_RECIPIENT_IRC}" -role_recipients_syslog[domainadmin]="${DEFAULT_RECIPIENT_SYSLOG}" +# role_recipients_syslog[domainadmin]="${DEFAULT_RECIPIENT_SYSLOG}" -role_recipients_prowl[domainadmin]="${DEFAULT_RECIPIENT_PROWL}" +# role_recipients_prowl[domainadmin]="${DEFAULT_RECIPIENT_PROWL}" -role_recipients_awssns[domainadmin]="${DEFAULT_RECIPIENT_AWSSNS}" +# role_recipients_awssns[domainadmin]="${DEFAULT_RECIPIENT_AWSSNS}" -role_recipients_custom[domainadmin]="${DEFAULT_RECIPIENT_CUSTOM}" +# role_recipients_custom[domainadmin]="${DEFAULT_RECIPIENT_CUSTOM}" -role_recipients_msteams[domainadmin]="${DEFAULT_RECIPIENT_MSTEAMS}" +# role_recipients_msteams[domainadmin]="${DEFAULT_RECIPIENT_MSTEAMS}" -role_recipients_rocketchat[domainadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" +# role_recipients_rocketchat[domainadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" -role_recipients_sms[domainadmin]="${DEFAULT_RECIPIENT_SMS}" +# role_recipients_sms[domainadmin]="${DEFAULT_RECIPIENT_SMS}" -role_recipients_dynatrace[domainadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" +# role_recipients_dynatrace[domainadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" -role_recipients_opsgenie[domainadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" +# role_recipients_opsgenie[domainadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" -role_recipients_matrix[domainadmin]="${DEFAULT_RECIPIENT_MATRIX}" +# role_recipients_matrix[domainadmin]="${DEFAULT_RECIPIENT_MATRIX}" -role_recipients_stackpulse[domainadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" +# role_recipients_stackpulse[domainadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" -role_recipients_gotify[domainadmin]="${DEFAULT_RECIPIENT_GOTIFY}" +# role_recipients_gotify[domainadmin]="${DEFAULT_RECIPIENT_GOTIFY}" -role_recipients_ntfy[domainadmin]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ntfy[domainadmin]="${DEFAULT_RECIPIENT_NTFY}" # ----------------------------------------------------------------------------- # database servers alarms # mysql, redis, memcached, postgres, etc -role_recipients_email[dba]="${DEFAULT_RECIPIENT_EMAIL}" +# role_recipients_email[dba]="${DEFAULT_RECIPIENT_EMAIL}" -role_recipients_hangouts[dba]="${DEFAULT_RECIPIENT_HANGOUTS}" +# role_recipients_hangouts[dba]="${DEFAULT_RECIPIENT_HANGOUTS}" -role_recipients_pushover[dba]="${DEFAULT_RECIPIENT_PUSHOVER}" +# role_recipients_pushover[dba]="${DEFAULT_RECIPIENT_PUSHOVER}" -role_recipients_pushbullet[dba]="${DEFAULT_RECIPIENT_PUSHBULLET}" +# role_recipients_pushbullet[dba]="${DEFAULT_RECIPIENT_PUSHBULLET}" -role_recipients_telegram[dba]="${DEFAULT_RECIPIENT_TELEGRAM}" +# role_recipients_telegram[dba]="${DEFAULT_RECIPIENT_TELEGRAM}" -role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}" +# role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}" -role_recipients_alerta[dba]="${DEFAULT_RECIPIENT_ALERTA}" +# role_recipients_alerta[dba]="${DEFAULT_RECIPIENT_ALERTA}" -role_recipients_flock[dba]="${DEFAULT_RECIPIENT_FLOCK}" +# role_recipients_flock[dba]="${DEFAULT_RECIPIENT_FLOCK}" -role_recipients_discord[dba]="${DEFAULT_RECIPIENT_DISCORD}" +# role_recipients_discord[dba]="${DEFAULT_RECIPIENT_DISCORD}" -role_recipients_hipchat[dba]="${DEFAULT_RECIPIENT_HIPCHAT}" +# role_recipients_hipchat[dba]="${DEFAULT_RECIPIENT_HIPCHAT}" -role_recipients_twilio[dba]="${DEFAULT_RECIPIENT_TWILIO}" +# role_recipients_twilio[dba]="${DEFAULT_RECIPIENT_TWILIO}" -role_recipients_messagebird[dba]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" +# role_recipients_messagebird[dba]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" -role_recipients_kavenegar[dba]="${DEFAULT_RECIPIENT_KAVENEGAR}" +# role_recipients_kavenegar[dba]="${DEFAULT_RECIPIENT_KAVENEGAR}" -role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}" +# role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}" -role_recipients_fleep[dba]="${DEFAULT_RECIPIENT_FLEEP}" +# role_recipients_fleep[dba]="${DEFAULT_RECIPIENT_FLEEP}" -role_recipients_irc[dba]="${DEFAULT_RECIPIENT_IRC}" +# role_recipients_irc[dba]="${DEFAULT_RECIPIENT_IRC}" -role_recipients_syslog[dba]="${DEFAULT_RECIPIENT_SYSLOG}" +# role_recipients_syslog[dba]="${DEFAULT_RECIPIENT_SYSLOG}" -role_recipients_prowl[dba]="${DEFAULT_RECIPIENT_PROWL}" +# role_recipients_prowl[dba]="${DEFAULT_RECIPIENT_PROWL}" -role_recipients_awssns[dba]="${DEFAULT_RECIPIENT_AWSSNS}" +# role_recipients_awssns[dba]="${DEFAULT_RECIPIENT_AWSSNS}" -role_recipients_custom[dba]="${DEFAULT_RECIPIENT_CUSTOM}" +# role_recipients_custom[dba]="${DEFAULT_RECIPIENT_CUSTOM}" -role_recipients_msteams[dba]="${DEFAULT_RECIPIENT_MSTEAMS}" +# role_recipients_msteams[dba]="${DEFAULT_RECIPIENT_MSTEAMS}" -role_recipients_rocketchat[dba]="${DEFAULT_RECIPIENT_ROCKETCHAT}" +# role_recipients_rocketchat[dba]="${DEFAULT_RECIPIENT_ROCKETCHAT}" -role_recipients_sms[dba]="${DEFAULT_RECIPIENT_SMS}" +# role_recipients_sms[dba]="${DEFAULT_RECIPIENT_SMS}" -role_recipients_dynatrace[dba]="${DEFAULT_RECIPIENT_DYNATRACE}" +# role_recipients_dynatrace[dba]="${DEFAULT_RECIPIENT_DYNATRACE}" -role_recipients_opsgenie[dba]="${DEFAULT_RECIPIENT_OPSGENIE}" +# role_recipients_opsgenie[dba]="${DEFAULT_RECIPIENT_OPSGENIE}" -role_recipients_matrix[dba]="${DEFAULT_RECIPIENT_MATRIX}" +# role_recipients_matrix[dba]="${DEFAULT_RECIPIENT_MATRIX}" -role_recipients_stackpulse[dba]="${DEFAULT_RECIPIENT_STACKPULSE}" +# role_recipients_stackpulse[dba]="${DEFAULT_RECIPIENT_STACKPULSE}" -role_recipients_gotify[dba]="${DEFAULT_RECIPIENT_GOTIFY}" +# role_recipients_gotify[dba]="${DEFAULT_RECIPIENT_GOTIFY}" -role_recipients_ntfy[dba]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ntfy[dba]="${DEFAULT_RECIPIENT_NTFY}" # ----------------------------------------------------------------------------- # web servers alarms # apache, nginx, lighttpd, etc -role_recipients_email[webmaster]="${DEFAULT_RECIPIENT_EMAIL}" +# role_recipients_email[webmaster]="${DEFAULT_RECIPIENT_EMAIL}" -role_recipients_hangouts[webmaster]="${DEFAULT_RECIPIENT_HANGOUTS}" +# role_recipients_hangouts[webmaster]="${DEFAULT_RECIPIENT_HANGOUTS}" -role_recipients_pushover[webmaster]="${DEFAULT_RECIPIENT_PUSHOVER}" +# role_recipients_pushover[webmaster]="${DEFAULT_RECIPIENT_PUSHOVER}" -role_recipients_pushbullet[webmaster]="${DEFAULT_RECIPIENT_PUSHBULLET}" +# role_recipients_pushbullet[webmaster]="${DEFAULT_RECIPIENT_PUSHBULLET}" -role_recipients_telegram[webmaster]="${DEFAULT_RECIPIENT_TELEGRAM}" +# role_recipients_telegram[webmaster]="${DEFAULT_RECIPIENT_TELEGRAM}" -role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}" +# role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}" -role_recipients_alerta[webmaster]="${DEFAULT_RECIPIENT_ALERTA}" +# role_recipients_alerta[webmaster]="${DEFAULT_RECIPIENT_ALERTA}" -role_recipients_flock[webmaster]="${DEFAULT_RECIPIENT_FLOCK}" +# role_recipients_flock[webmaster]="${DEFAULT_RECIPIENT_FLOCK}" -role_recipients_discord[webmaster]="${DEFAULT_RECIPIENT_DISCORD}" +# role_recipients_discord[webmaster]="${DEFAULT_RECIPIENT_DISCORD}" -role_recipients_hipchat[webmaster]="${DEFAULT_RECIPIENT_HIPCHAT}" +# role_recipients_hipchat[webmaster]="${DEFAULT_RECIPIENT_HIPCHAT}" -role_recipients_twilio[webmaster]="${DEFAULT_RECIPIENT_TWILIO}" +# role_recipients_twilio[webmaster]="${DEFAULT_RECIPIENT_TWILIO}" -role_recipients_messagebird[webmaster]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" +# role_recipients_messagebird[webmaster]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" -role_recipients_kavenegar[webmaster]="${DEFAULT_RECIPIENT_KAVENEGAR}" +# role_recipients_kavenegar[webmaster]="${DEFAULT_RECIPIENT_KAVENEGAR}" -role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}" +# role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}" -role_recipients_fleep[webmaster]="${DEFAULT_RECIPIENT_FLEEP}" +# role_recipients_fleep[webmaster]="${DEFAULT_RECIPIENT_FLEEP}" -role_recipients_irc[webmaster]="${DEFAULT_RECIPIENT_IRC}" +# role_recipients_irc[webmaster]="${DEFAULT_RECIPIENT_IRC}" -role_recipients_syslog[webmaster]="${DEFAULT_RECIPIENT_SYSLOG}" +# role_recipients_syslog[webmaster]="${DEFAULT_RECIPIENT_SYSLOG}" -role_recipients_prowl[webmaster]="${DEFAULT_RECIPIENT_PROWL}" +# role_recipients_prowl[webmaster]="${DEFAULT_RECIPIENT_PROWL}" -role_recipients_awssns[webmaster]="${DEFAULT_RECIPIENT_AWSSNS}" +# role_recipients_awssns[webmaster]="${DEFAULT_RECIPIENT_AWSSNS}" -role_recipients_custom[webmaster]="${DEFAULT_RECIPIENT_CUSTOM}" +# role_recipients_custom[webmaster]="${DEFAULT_RECIPIENT_CUSTOM}" -role_recipients_msteams[webmaster]="${DEFAULT_RECIPIENT_MSTEAMS}" +# role_recipients_msteams[webmaster]="${DEFAULT_RECIPIENT_MSTEAMS}" -role_recipients_rocketchat[webmaster]="${DEFAULT_RECIPIENT_ROCKETCHAT}" +# role_recipients_rocketchat[webmaster]="${DEFAULT_RECIPIENT_ROCKETCHAT}" -role_recipients_sms[webmaster]="${DEFAULT_RECIPIENT_SMS}" +# role_recipients_sms[webmaster]="${DEFAULT_RECIPIENT_SMS}" -role_recipients_dynatrace[webmaster]="${DEFAULT_RECIPIENT_DYNATRACE}" +# role_recipients_dynatrace[webmaster]="${DEFAULT_RECIPIENT_DYNATRACE}" -role_recipients_opsgenie[webmaster]="${DEFAULT_RECIPIENT_OPSGENIE}" +# role_recipients_opsgenie[webmaster]="${DEFAULT_RECIPIENT_OPSGENIE}" -role_recipients_matrix[webmaster]="${DEFAULT_RECIPIENT_MATRIX}" +# role_recipients_matrix[webmaster]="${DEFAULT_RECIPIENT_MATRIX}" -role_recipients_stackpulse[webmaster]="${DEFAULT_RECIPIENT_STACKPULSE}" +# role_recipients_stackpulse[webmaster]="${DEFAULT_RECIPIENT_STACKPULSE}" -role_recipients_gotify[webmaster]="${DEFAULT_RECIPIENT_GOTIFY}" +# role_recipients_gotify[webmaster]="${DEFAULT_RECIPIENT_GOTIFY}" -role_recipients_ntfy[webmaster]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ntfy[webmaster]="${DEFAULT_RECIPIENT_NTFY}" # ----------------------------------------------------------------------------- # proxy servers alarms # squid, etc -role_recipients_email[proxyadmin]="${DEFAULT_RECIPIENT_EMAIL}" +# role_recipients_email[proxyadmin]="${DEFAULT_RECIPIENT_EMAIL}" -role_recipients_hangouts[proxyadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" +# role_recipients_hangouts[proxyadmin]="${DEFAULT_RECIPIENT_HANGOUTS}" -role_recipients_pushover[proxyadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" +# role_recipients_pushover[proxyadmin]="${DEFAULT_RECIPIENT_PUSHOVER}" -role_recipients_pushbullet[proxyadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" +# role_recipients_pushbullet[proxyadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}" -role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" +# role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" -role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}" +# role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}" -role_recipients_alerta[proxyadmin]="${DEFAULT_RECIPIENT_ALERTA}" +# role_recipients_alerta[proxyadmin]="${DEFAULT_RECIPIENT_ALERTA}" -role_recipients_flock[proxyadmin]="${DEFAULT_RECIPIENT_FLOCK}" +# role_recipients_flock[proxyadmin]="${DEFAULT_RECIPIENT_FLOCK}" -role_recipients_discord[proxyadmin]="${DEFAULT_RECIPIENT_DISCORD}" +# role_recipients_discord[proxyadmin]="${DEFAULT_RECIPIENT_DISCORD}" -role_recipients_hipchat[proxyadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" +# role_recipients_hipchat[proxyadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" -role_recipients_twilio[proxyadmin]="${DEFAULT_RECIPIENT_TWILIO}" +# role_recipients_twilio[proxyadmin]="${DEFAULT_RECIPIENT_TWILIO}" -role_recipients_messagebird[proxyadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" +# role_recipients_messagebird[proxyadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" -role_recipients_kavenegar[proxyadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" +# role_recipients_kavenegar[proxyadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" -role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}" +# role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}" -role_recipients_fleep[proxyadmin]="${DEFAULT_RECIPIENT_FLEEP}" +# role_recipients_fleep[proxyadmin]="${DEFAULT_RECIPIENT_FLEEP}" -role_recipients_irc[proxyadmin]="${DEFAULT_RECIPIENT_IRC}" +# role_recipients_irc[proxyadmin]="${DEFAULT_RECIPIENT_IRC}" -role_recipients_syslog[proxyadmin]="${DEFAULT_RECIPIENT_SYSLOG}" +# role_recipients_syslog[proxyadmin]="${DEFAULT_RECIPIENT_SYSLOG}" -role_recipients_prowl[proxyadmin]="${DEFAULT_RECIPIENT_PROWL}" +# role_recipients_prowl[proxyadmin]="${DEFAULT_RECIPIENT_PROWL}" -role_recipients_awssns[proxyadmin]="${DEFAULT_RECIPIENT_AWSSNS}" +# role_recipients_awssns[proxyadmin]="${DEFAULT_RECIPIENT_AWSSNS}" -role_recipients_custom[proxyadmin]="${DEFAULT_RECIPIENT_CUSTOM}" +# role_recipients_custom[proxyadmin]="${DEFAULT_RECIPIENT_CUSTOM}" -role_recipients_msteams[proxyadmin]="${DEFAULT_RECIPIENT_MSTEAMS}" +# role_recipients_msteams[proxyadmin]="${DEFAULT_RECIPIENT_MSTEAMS}" -role_recipients_rocketchat[proxyadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" +# role_recipients_rocketchat[proxyadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" -role_recipients_sms[proxyadmin]="${DEFAULT_RECIPIENT_SMS}" +# role_recipients_sms[proxyadmin]="${DEFAULT_RECIPIENT_SMS}" -role_recipients_dynatrace[proxyadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" +# role_recipients_dynatrace[proxyadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" -role_recipients_opsgenie[proxyadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" +# role_recipients_opsgenie[proxyadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" -role_recipients_matrix[proxyadmin]="${DEFAULT_RECIPIENT_MATRIX}" +# role_recipients_matrix[proxyadmin]="${DEFAULT_RECIPIENT_MATRIX}" -role_recipients_stackpulse[proxyadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" +# role_recipients_stackpulse[proxyadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" -role_recipients_gotify[proxyadmin]="${DEFAULT_RECIPIENT_GOTIFY}" +# role_recipients_gotify[proxyadmin]="${DEFAULT_RECIPIENT_GOTIFY}" -role_recipients_ntfy[proxyadmin]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ntfy[proxyadmin]="${DEFAULT_RECIPIENT_NTFY}" # ----------------------------------------------------------------------------- # peripheral devices # UPS, photovoltaics, etc -role_recipients_email[sitemgr]="${DEFAULT_RECIPIENT_EMAIL}" +# role_recipients_email[sitemgr]="${DEFAULT_RECIPIENT_EMAIL}" -role_recipients_hangouts[sitemgr]="${DEFAULT_RECIPIENT_HANGOUTS}" +# role_recipients_hangouts[sitemgr]="${DEFAULT_RECIPIENT_HANGOUTS}" -role_recipients_pushover[sitemgr]="${DEFAULT_RECIPIENT_PUSHOVER}" +# role_recipients_pushover[sitemgr]="${DEFAULT_RECIPIENT_PUSHOVER}" -role_recipients_pushbullet[sitemgr]="${DEFAULT_RECIPIENT_PUSHBULLET}" +# role_recipients_pushbullet[sitemgr]="${DEFAULT_RECIPIENT_PUSHBULLET}" -role_recipients_telegram[sitemgr]="${DEFAULT_RECIPIENT_TELEGRAM}" +# role_recipients_telegram[sitemgr]="${DEFAULT_RECIPIENT_TELEGRAM}" -role_recipients_slack[sitemgr]="${DEFAULT_RECIPIENT_SLACK}" +# role_recipients_slack[sitemgr]="${DEFAULT_RECIPIENT_SLACK}" -role_recipients_alerta[sitemgr]="${DEFAULT_RECIPIENT_ALERTA}" +# role_recipients_alerta[sitemgr]="${DEFAULT_RECIPIENT_ALERTA}" -role_recipients_flock[sitemgr]="${DEFAULT_RECIPIENT_FLOCK}" +# role_recipients_flock[sitemgr]="${DEFAULT_RECIPIENT_FLOCK}" -role_recipients_discord[sitemgr]="${DEFAULT_RECIPIENT_DISCORD}" +# role_recipients_discord[sitemgr]="${DEFAULT_RECIPIENT_DISCORD}" -role_recipients_hipchat[sitemgr]="${DEFAULT_RECIPIENT_HIPCHAT}" +# role_recipients_hipchat[sitemgr]="${DEFAULT_RECIPIENT_HIPCHAT}" -role_recipients_twilio[sitemgr]="${DEFAULT_RECIPIENT_TWILIO}" +# role_recipients_twilio[sitemgr]="${DEFAULT_RECIPIENT_TWILIO}" -role_recipients_messagebird[sitemgr]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" +# role_recipients_messagebird[sitemgr]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" -role_recipients_kavenegar[sitemgr]="${DEFAULT_RECIPIENT_KAVENEGAR}" +# role_recipients_kavenegar[sitemgr]="${DEFAULT_RECIPIENT_KAVENEGAR}" -role_recipients_pd[sitemgr]="${DEFAULT_RECIPIENT_PD}" +# role_recipients_pd[sitemgr]="${DEFAULT_RECIPIENT_PD}" -role_recipients_fleep[sitemgr]="${DEFAULT_RECIPIENT_FLEEP}" +# role_recipients_fleep[sitemgr]="${DEFAULT_RECIPIENT_FLEEP}" -role_recipients_syslog[sitemgr]="${DEFAULT_RECIPIENT_SYSLOG}" +# role_recipients_syslog[sitemgr]="${DEFAULT_RECIPIENT_SYSLOG}" -role_recipients_prowl[sitemgr]="${DEFAULT_RECIPIENT_PROWL}" +# role_recipients_prowl[sitemgr]="${DEFAULT_RECIPIENT_PROWL}" -role_recipients_awssns[sitemgr]="${DEFAULT_RECIPIENT_AWSSNS}" +# role_recipients_awssns[sitemgr]="${DEFAULT_RECIPIENT_AWSSNS}" -role_recipients_custom[sitemgr]="${DEFAULT_RECIPIENT_CUSTOM}" +# role_recipients_custom[sitemgr]="${DEFAULT_RECIPIENT_CUSTOM}" -role_recipients_msteams[sitemgr]="${DEFAULT_RECIPIENT_MSTEAMS}" +# role_recipients_msteams[sitemgr]="${DEFAULT_RECIPIENT_MSTEAMS}" -role_recipients_rocketchat[sitemgr]="${DEFAULT_RECIPIENT_ROCKETCHAT}" +# role_recipients_rocketchat[sitemgr]="${DEFAULT_RECIPIENT_ROCKETCHAT}" -role_recipients_sms[sitemgr]="${DEFAULT_RECIPIENT_SMS}" +# role_recipients_sms[sitemgr]="${DEFAULT_RECIPIENT_SMS}" -role_recipients_dynatrace[sitemgr]="${DEFAULT_RECIPIENT_DYNATRACE}" +# role_recipients_dynatrace[sitemgr]="${DEFAULT_RECIPIENT_DYNATRACE}" -role_recipients_opsgenie[sitemgr]="${DEFAULT_RECIPIENT_OPSGENIE}" +# role_recipients_opsgenie[sitemgr]="${DEFAULT_RECIPIENT_OPSGENIE}" -role_recipients_matrix[sitemgr]="${DEFAULT_RECIPIENT_MATRIX}" +# role_recipients_matrix[sitemgr]="${DEFAULT_RECIPIENT_MATRIX}" -role_recipients_stackpulse[sitemgr]="${DEFAULT_RECIPIENT_STACKPULSE}" +# role_recipients_stackpulse[sitemgr]="${DEFAULT_RECIPIENT_STACKPULSE}" -role_recipients_gotify[sitemgr]="${DEFAULT_RECIPIENT_GOTIFY}" +# role_recipients_gotify[sitemgr]="${DEFAULT_RECIPIENT_GOTIFY}" -role_recipients_ntfy[sitemgr]="${DEFAULT_RECIPIENT_NTFY}" +# role_recipients_ntfy[sitemgr]="${DEFAULT_RECIPIENT_NTFY}" diff --git a/libnetdata/libnetdata.c b/libnetdata/libnetdata.c index a8f26c33b..19b861e39 100644 --- a/libnetdata/libnetdata.c +++ b/libnetdata/libnetdata.c @@ -2031,6 +2031,6 @@ int hash256_string(const unsigned char *string, size_t size, char *hash) { EVP_MD_CTX_destroy(ctx); return 0; } - + EVP_MD_CTX_destroy(ctx); return 1; } diff --git a/ml/Config.cc b/ml/Config.cc index d451c602c..c5129c49d 100644 --- a/ml/Config.cc +++ b/ml/Config.cc @@ -28,7 +28,7 @@ void ml_config_load(ml_config_t *cfg) { unsigned max_train_samples = config_get_number(config_section_ml, "maximum num samples to train", 4 * 3600); unsigned min_train_samples = config_get_number(config_section_ml, "minimum num samples to train", 1 * 900); unsigned train_every = config_get_number(config_section_ml, "train every", 1 * 3600); - unsigned num_models_to_use = config_get_number(config_section_ml, "number of models per dimension", 1); + unsigned num_models_to_use = config_get_number(config_section_ml, "number of models per dimension", 2); unsigned diff_n = config_get_number(config_section_ml, "num samples to diff", 1); unsigned smooth_n = config_get_number(config_section_ml, "num samples to smooth", 3); @@ -46,6 +46,9 @@ void ml_config_load(ml_config_t *cfg) { size_t num_training_threads = config_get_number(config_section_ml, "num training threads", 4); size_t flush_models_batch_size = config_get_number(config_section_ml, "flush models batch size", 128); + size_t suppression_window = config_get_number(config_section_ml, "dimension anomaly rate suppression window", 900); + size_t suppression_threshold = config_get_number(config_section_ml, "dimension anomaly rate suppression threshold", suppression_window / 2); + bool enable_statistics_charts = config_get_boolean(config_section_ml, "enable statistics charts", true); /* @@ -72,7 +75,10 @@ void ml_config_load(ml_config_t *cfg) { num_training_threads = clamp<size_t>(num_training_threads, 1, 128); flush_models_batch_size = clamp<size_t>(flush_models_batch_size, 8, 512); - /* + suppression_window = clamp<size_t>(suppression_window, 1, max_train_samples); + suppression_threshold = clamp<size_t>(suppression_threshold, 1, suppression_window); + + /* * Validate */ @@ -121,5 +127,8 @@ void ml_config_load(ml_config_t *cfg) { cfg->num_training_threads = num_training_threads; cfg->flush_models_batch_size = flush_models_batch_size; + cfg->suppression_window = suppression_window; + cfg->suppression_threshold = suppression_threshold; + cfg->enable_statistics_charts = enable_statistics_charts; } diff --git a/ml/README.md b/ml/README.md index ac7c7c013..60f38f22e 100644 --- a/ml/README.md +++ b/ml/README.md @@ -130,7 +130,7 @@ Below is a list of all the available configuration params and their default valu # maximum num samples to train = 14400 # minimum num samples to train = 3600 # train every = 3600 - # number of models per dimension = 1 + # number of models per dimension = 2 # dbengine anomaly rate every = 30 # num samples to diff = 1 # num samples to smooth = 3 @@ -143,6 +143,8 @@ Below is a list of all the available configuration params and their default valu # anomaly detection grouping duration = 300 # hosts to skip from training = !* # charts to skip from training = netdata.* + # dimension anomaly rate suppression window = 900 + # dimension anomaly rate suppression threshold = 450 ``` ### Configuration Examples @@ -187,7 +189,7 @@ This example assumes 3 child nodes [streaming](https://github.com/netdata/netdat - `maximum num samples to train`: (`3600`/`86400`) This is the maximum amount of time you would like to train each model on. For example, the default of `14400` trains on the preceding 4 hours of data, assuming an `update every` of 1 second. - `minimum num samples to train`: (`900`/`21600`) This is the minimum amount of data required to be able to train a model. For example, the default of `900` implies that once at least 15 minutes of data is available for training, a model is trained, otherwise it is skipped and checked again at the next training run. - `train every`: (`1800`/`21600`) This is how often each model will be retrained. For example, the default of `3600` means that each model is retrained every hour. Note: The training of all models is spread out across the `train every` period for efficiency, so in reality, it means that each model will be trained in a staggered manner within each `train every` period. -- `number of models per dimension`: (`1`/`168`) This is the number of trained models that will be used for scoring. For example the default `number of models per dimension = 1` means that just the most recently trained model (covering up to the most recent `maximum num samples to train` of training data) for the dimension will be used to determine the corresponding anomaly bit. Alternatively, if you have `train every = 3600` and `number of models per dimension = 24` this means that netdata will store and use the last 24 trained models for each dimension when determining the anomaly bit, this means that for the latest feature vector in this configuration to be considered anomalous it would need to look anomalous across _all_ the models trained for that dimension in the last 24 hours. As such, increasing `number of models per dimension` may reduce some false positives since it will result in more models (covering a wider time frame of training) being used during scoring. +- `number of models per dimension`: (`1`/`168`) This is the number of trained models that will be used for scoring. For example the default `number of models per dimension = 2` means that the two most recently trained models (covering up to the most recent `maximum num samples to train` of training data) for the dimension will be used to determine the corresponding anomaly bit. Alternatively, if you have `train every = 3600` and `number of models per dimension = 24` this means that netdata will store and use the last 24 trained models for each dimension when determining the anomaly bit, this means that for the latest feature vector in this configuration to be considered anomalous it would need to look anomalous across _all_ the models trained for that dimension in the last 24 hours. As such, increasing `number of models per dimension` may reduce some false positives since it will result in more models (covering a wider time frame of training) being used during scoring. - `dbengine anomaly rate every`: (`30`/`900`) This is how often netdata will aggregate all the anomaly bits into a single chart (`anomaly_detection.anomaly_rates`). The aggregation into a single chart allows enabling anomaly rate ranking over _all_ metrics with one API call as opposed to a call per chart. - `num samples to diff`: (`0`/`1`) This is a `0` or `1` to determine if you want the model to operate on differences of the raw data or just the raw data. For example, the default of `1` means that we take differences of the raw values. Using differences is more general and works on dimensions that might naturally tend to have some trends or cycles in them that is normal behavior to which we don't want to be too sensitive. - `num samples to smooth`: (`0`/`5`) This is a small integer that controls the amount of smoothing applied as part of the feature processing used by the model. For example, the default of `3` means that the rolling average of the last 3 values is used. Smoothing like this helps the model be a little more robust to spiky types of dimensions that naturally "jump" up or down as part of their normal behavior. diff --git a/ml/ad_charts.cc b/ml/ad_charts.cc index 086cd5aa0..bd065cfcc 100644 --- a/ml/ad_charts.cc +++ b/ml/ad_charts.cc @@ -124,6 +124,8 @@ void ml_update_dimensions_chart(ml_host_t *host, const ml_machine_learning_stats rrddim_add(host->training_status_rs, "trained", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); host->training_status_pending_with_model_rd = rrddim_add(host->training_status_rs, "pending-with-model", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + host->training_status_silenced_rd = + rrddim_add(host->training_status_rs, "silenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } rrddim_set_by_pointer(host->training_status_rs, @@ -134,6 +136,8 @@ void ml_update_dimensions_chart(ml_host_t *host, const ml_machine_learning_stats host->training_status_trained_rd, mls.num_training_status_trained); rrddim_set_by_pointer(host->training_status_rs, host->training_status_pending_with_model_rd, mls.num_training_status_pending_with_model); + rrddim_set_by_pointer(host->training_status_rs, + host->training_status_silenced_rd, mls.num_training_status_silenced); rrdset_done(host->training_status_rs); } diff --git a/ml/ml-private.h b/ml/ml-private.h index 327cc59a2..2ed70d1ca 100644 --- a/ml/ml-private.h +++ b/ml/ml-private.h @@ -55,6 +55,7 @@ typedef struct machine_learning_stats_t { size_t num_training_status_pending_without_model; size_t num_training_status_trained; size_t num_training_status_pending_with_model; + size_t num_training_status_silenced; size_t num_anomalous_dimensions; size_t num_normal_dimensions; @@ -103,6 +104,9 @@ enum ml_training_status { // Have a valid, up-to-date model TRAINING_STATUS_TRAINED, + + // Have a valid, up-to-date model that is silenced because its too noisy + TRAINING_STATUS_SILENCED, }; enum ml_training_result { @@ -194,6 +198,9 @@ typedef struct { netdata_mutex_t mutex; ml_kmeans_t kmeans; std::vector<DSample> feature; + + uint32_t suppression_window_counter; + uint32_t suppression_anomaly_counter; } ml_dimension_t; typedef struct { @@ -233,6 +240,7 @@ typedef struct { RRDDIM *training_status_pending_without_model_rd; RRDDIM *training_status_trained_rd; RRDDIM *training_status_pending_with_model_rd; + RRDDIM *training_status_silenced_rd; RRDSET *dimensions_rs; RRDDIM *dimensions_anomalous_rd; @@ -325,6 +333,9 @@ typedef struct { std::vector<ml_training_thread_t> training_threads; std::atomic<bool> training_stop; + size_t suppression_window; + size_t suppression_threshold; + bool enable_statistics_charts; } ml_config_t; @@ -63,6 +63,8 @@ ml_training_status_to_string(enum ml_training_status ts) return "trained"; case TRAINING_STATUS_UNTRAINED: return "untrained"; + case TRAINING_STATUS_SILENCED: + return "silenced"; default: return "unknown"; } @@ -490,12 +492,16 @@ ml_dimension_add_model(const uuid_t *metric_uuid, const ml_kmeans_t *km) } rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) + if (unlikely(rc != SQLITE_DONE)) { error_report("Failed to store model, rc = %d", rc); + return rc; + } rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) + if (unlikely(rc != SQLITE_OK)) { error_report("Failed to reset statement when storing model, rc = %d", rc); + return rc; + } return 0; @@ -504,7 +510,7 @@ bind_fail: rc = sqlite3_reset(res); if (unlikely(rc != SQLITE_OK)) error_report("Failed to reset statement to store model, rc = %d", rc); - return 1; + return rc; } static int @@ -523,7 +529,7 @@ ml_dimension_delete_models(const uuid_t *metric_uuid, time_t before) rc = prepare_statement(db, db_models_delete, &res); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to prepare statement to delete models, rc = %d", rc); - return 1; + return rc; } } @@ -536,12 +542,16 @@ ml_dimension_delete_models(const uuid_t *metric_uuid, time_t before) goto bind_fail; rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) + if (unlikely(rc != SQLITE_DONE)) { error_report("Failed to delete models, rc = %d", rc); + return rc; + } rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) + if (unlikely(rc != SQLITE_OK)) { error_report("Failed to reset statement when deleting models, rc = %d", rc); + return rc; + } return 0; @@ -550,7 +560,7 @@ bind_fail: rc = sqlite3_reset(res); if (unlikely(rc != SQLITE_OK)) error_report("Failed to reset statement to delete models, rc = %d", rc); - return 1; + return rc; } int ml_dimension_load_models(RRDDIM *rd) { @@ -671,6 +681,8 @@ ml_dimension_train_model(ml_training_thread_t *training_thread, ml_dimension_t * break; } + dim->suppression_anomaly_counter = 0; + dim->suppression_window_counter = 0; dim->tr = training_response; dim->last_training_time = training_response.last_entry_on_response; @@ -727,6 +739,10 @@ ml_dimension_train_model(ml_training_thread_t *training_thread, ml_dimension_t * dim->mt = METRIC_TYPE_CONSTANT; dim->ts = TRAINING_STATUS_TRAINED; + + dim->suppression_anomaly_counter = 0; + dim->suppression_window_counter = 0; + dim->tr = training_response; dim->last_training_time = rrddim_last_entry_s(dim->rd); @@ -763,6 +779,7 @@ ml_dimension_schedule_for_training(ml_dimension_t *dim, time_t curr_time) schedule_for_training = true; dim->ts = TRAINING_STATUS_PENDING_WITHOUT_MODEL; break; + case TRAINING_STATUS_SILENCED: case TRAINING_STATUS_TRAINED: if ((dim->last_training_time + (Cfg.train_every * dim->rd->update_every)) < curr_time) { schedule_for_training = true; @@ -848,6 +865,7 @@ ml_dimension_predict(ml_dimension_t *dim, time_t curr_time, calculated_number_t switch (dim->ts) { case TRAINING_STATUS_UNTRAINED: case TRAINING_STATUS_PENDING_WITHOUT_MODEL: { + case TRAINING_STATUS_SILENCED: netdata_mutex_unlock(&dim->mutex); return false; } @@ -855,6 +873,8 @@ ml_dimension_predict(ml_dimension_t *dim, time_t curr_time, calculated_number_t break; } + dim->suppression_window_counter++; + /* * Use the KMeans models to check if the value is anomalous */ @@ -878,6 +898,13 @@ ml_dimension_predict(ml_dimension_t *dim, time_t curr_time, calculated_number_t sum += 1; } + dim->suppression_anomaly_counter += sum ? 1 : 0; + + if ((dim->suppression_anomaly_counter >= Cfg.suppression_threshold) && + (dim->suppression_window_counter >= Cfg.suppression_window)) { + dim->ts = TRAINING_STATUS_SILENCED; + } + netdata_mutex_unlock(&dim->mutex); global_statistics_ml_models_consulted(models_consulted); @@ -934,6 +961,13 @@ ml_chart_update_dimension(ml_chart_t *chart, ml_dimension_t *dim, bool is_anomal chart->mls.num_anomalous_dimensions += is_anomalous; chart->mls.num_normal_dimensions += !is_anomalous; return; + case TRAINING_STATUS_SILENCED: + chart->mls.num_training_status_silenced++; + chart->mls.num_training_status_trained++; + + chart->mls.num_anomalous_dimensions += is_anomalous; + chart->mls.num_normal_dimensions += !is_anomalous; + return; } return; @@ -987,6 +1021,7 @@ ml_host_detect_once(ml_host_t *host) host->mls.num_training_status_pending_without_model += chart_mls.num_training_status_pending_without_model; host->mls.num_training_status_trained += chart_mls.num_training_status_trained; host->mls.num_training_status_pending_with_model += chart_mls.num_training_status_pending_with_model; + host->mls.num_training_status_silenced += chart_mls.num_training_status_silenced; host->mls.num_anomalous_dimensions += chart_mls.num_anomalous_dimensions; host->mls.num_normal_dimensions += chart_mls.num_normal_dimensions; @@ -1370,23 +1405,37 @@ bool ml_dimension_is_anomalous(RRDDIM *rd, time_t curr_time, double value, bool return is_anomalous; } -static int ml_flush_pending_models(ml_training_thread_t *training_thread) { - (void) db_execute(db, "BEGIN TRANSACTION;"); +static void ml_flush_pending_models(ml_training_thread_t *training_thread) { + int rc = db_execute(db, "BEGIN TRANSACTION;"); + int op_no = 1; - for (const auto &pending_model: training_thread->pending_model_info) { - int rc = ml_dimension_add_model(&pending_model.metric_uuid, &pending_model.kmeans); - if (rc) - return rc; + if (!rc) { + op_no++; - rc = ml_dimension_delete_models(&pending_model.metric_uuid, pending_model.kmeans.before - (Cfg.num_models_to_use * Cfg.train_every)); - if (rc) - return rc; + for (const auto &pending_model: training_thread->pending_model_info) { + if (!rc) + rc = ml_dimension_add_model(&pending_model.metric_uuid, &pending_model.kmeans); + + if (!rc) + rc = ml_dimension_delete_models(&pending_model.metric_uuid, pending_model.kmeans.before - (Cfg.num_models_to_use * Cfg.train_every)); + } + } + + if (!rc) { + op_no++; + rc = db_execute(db, "COMMIT TRANSACTION;"); } - (void) db_execute(db, "COMMIT TRANSACTION;"); + // try to rollback transaction if we got any failures + if (rc) { + error("Trying to rollback ML transaction because it failed with rc=%d, op_no=%d", rc, op_no); + op_no++; + rc = db_execute(db, "ROLLBACK;"); + if (rc) + error("ML transaction rollback failed with rc=%d", rc); + } training_thread->pending_model_info.clear(); - return 0; } static void *ml_train_main(void *arg) { diff --git a/packaging/makeself/install-or-update.sh b/packaging/makeself/install-or-update.sh index 03f7c2c7c..1eabde83c 100755 --- a/packaging/makeself/install-or-update.sh +++ b/packaging/makeself/install-or-update.sh @@ -204,7 +204,8 @@ fi progress "fix permissions" run chmod g+rx,o+rx /opt -run chown -R ${NETDATA_USER}:${NETDATA_GROUP} /opt/netdata +run find /opt/netdata -type d -exec chmod go+rx '{}' \+ +run chown -R ${NETDATA_USER}:${NETDATA_GROUP} /opt/netdata/var # ----------------------------------------------------------------------------- diff --git a/packaging/version b/packaging/version index 5b813750d..f90a7b3c1 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.39.0 +v1.39.1 |