summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
Diffstat (limited to 'daemon')
-rw-r--r--daemon/README.md4
-rw-r--r--daemon/analytics.c10
-rw-r--r--daemon/analytics.h3
-rwxr-xr-xdaemon/anonymous-statistics.sh.in4
-rw-r--r--daemon/config/README.md131
-rw-r--r--daemon/event_loop.c1
-rw-r--r--daemon/event_loop.h1
-rw-r--r--daemon/global_statistics.c31
-rw-r--r--daemon/main.c26
-rw-r--r--daemon/service.c17
10 files changed, 128 insertions, 100 deletions
diff --git a/daemon/README.md b/daemon/README.md
index 3fb33e5c7..0707a406c 100644
--- a/daemon/README.md
+++ b/daemon/README.md
@@ -38,7 +38,7 @@ The command line options of the Netdata 1.10.0 version are the following:
Support : https://github.com/netdata/netdata/issues
License : https://github.com/netdata/netdata/blob/master/LICENSE.md
- Twitter : https://twitter.com/linuxnetdata
+ Twitter : https://twitter.com/netdatahq
LinkedIn : https://linkedin.com/company/netdata-cloud/
Facebook : https://facebook.com/linuxnetdata/
@@ -143,6 +143,8 @@ For most Netdata programs (including standard external plugins shipped by netdat
| `ERROR` | Something that might disable a part of netdata.<br/>The log line includes `errno` (if it is not zero). |
| `FATAL` | Something prevented a program from running.<br/>The log line includes `errno` (if it is not zero) and the program exited. |
+The `FATAL` and `ERROR` messages will always appear in the logs, and `INFO`can be filtered using [severity level](https://github.com/netdata/netdata/tree/master/daemon/config#logs-section-options) option.
+
So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the
program continues to run.
diff --git a/daemon/analytics.c b/daemon/analytics.c
index 9323c8e8a..c149e2583 100644
--- a/daemon/analytics.c
+++ b/daemon/analytics.c
@@ -109,6 +109,7 @@ void analytics_free_data(void)
freez(analytics_data.netdata_config_use_private_registry);
freez(analytics_data.netdata_config_oom_score);
freez(analytics_data.netdata_prebuilt_distro);
+ freez(analytics_data.netdata_fail_reason);
}
/*
@@ -127,7 +128,7 @@ void analytics_set_data(char **name, char *value)
/*
* Set a string data with a value
*/
-void analytics_set_data_str(char **name, char *value)
+void analytics_set_data_str(char **name, const char *value)
{
size_t value_string_len;
if (*name) {
@@ -899,6 +900,7 @@ void set_global_environment()
analytics_set_data(&analytics_data.netdata_config_use_private_registry, "null");
analytics_set_data(&analytics_data.netdata_config_oom_score, "null");
analytics_set_data(&analytics_data.netdata_prebuilt_distro, "null");
+ analytics_set_data(&analytics_data.netdata_fail_reason, "null");
analytics_data.prometheus_hits = 0;
analytics_data.shell_hits = 0;
@@ -974,6 +976,7 @@ void send_statistics(const char *action, const char *action_result, const char *
action_result = "";
if (!action_data)
action_data = "";
+
char *command_to_run = mallocz(
sizeof(char) * (strlen(action) + strlen(action_result) + strlen(action_data) + strlen(as_script) +
analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15));
@@ -981,7 +984,7 @@ void send_statistics(const char *action, const char *action_result, const char *
sprintf(
command_to_run,
- "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ",
+ "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ",
as_script,
action,
action_result,
@@ -1024,7 +1027,8 @@ void send_statistics(const char *action, const char *action_result, const char *
analytics_data.netdata_config_is_private_registry,
analytics_data.netdata_config_use_private_registry,
analytics_data.netdata_config_oom_score,
- analytics_data.netdata_prebuilt_distro);
+ analytics_data.netdata_prebuilt_distro,
+ analytics_data.netdata_fail_reason);
netdata_log_info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data);
diff --git a/daemon/analytics.h b/daemon/analytics.h
index 34418316f..0a5cc458d 100644
--- a/daemon/analytics.h
+++ b/daemon/analytics.h
@@ -18,7 +18,7 @@
#define ANALYTICS_MAX_DASHBOARD_HITS 255
/* Needed to calculate the space needed for parameters */
-#define ANALYTICS_NO_OF_ITEMS 39
+#define ANALYTICS_NO_OF_ITEMS 40
struct analytics_data {
char *netdata_config_stream_enabled;
@@ -60,6 +60,7 @@ struct analytics_data {
char *netdata_config_use_private_registry;
char *netdata_config_oom_score;
char *netdata_prebuilt_distro;
+ char *netdata_fail_reason;
size_t data_length;
diff --git a/daemon/anonymous-statistics.sh.in b/daemon/anonymous-statistics.sh.in
index 6b27dfea4..d12e7e32a 100755
--- a/daemon/anonymous-statistics.sh.in
+++ b/daemon/anonymous-statistics.sh.in
@@ -68,6 +68,7 @@ NETDATA_IS_PRIVATE_REGISTRY="${39}"
NETDATA_USE_PRIVATE_REGISTRY="${40}"
NETDATA_CONFIG_OOM_SCORE="${41}"
NETDATA_PREBUILT_DISTRO="${42}"
+NETDATA_FAIL_REASON="${43}"
[ -z "$NETDATA_REGISTRY_UNIQUE_ID" ] && NETDATA_REGISTRY_UNIQUE_ID="00000000-0000-0000-0000-000000000000"
@@ -175,7 +176,8 @@ REQ_BODY="$(cat << EOF
"mirrored_host_count": ${NETDATA_MIRRORED_HOST_COUNT},
"mirrored_hosts_reachable": ${NETDATA_MIRRORED_HOSTS_REACHABLE},
"mirrored_hosts_unreachable": ${NETDATA_MIRRORED_HOSTS_UNREACHABLE},
- "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS}
+ "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS},
+ "netdata_fail_reason": ${NETDATA_FAIL_REASON}
}
}
EOF
diff --git a/daemon/config/README.md b/daemon/config/README.md
index bc5a5885c..11ba2a1bc 100644
--- a/daemon/config/README.md
+++ b/daemon/config/README.md
@@ -72,40 +72,40 @@ Please note that your data history will be lost if you have modified `history` p
### [global] section options
-| setting | default | info |
-|:-------------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) |
-| OOM score | `0` | |
-| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
-| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
-| hostname | auto-detected | The hostname of the computer running Netdata. |
-| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
-| timezone | auto-detected | The timezone retrieved from the environment variable |
-| run as user | `netdata` | The user Netdata will run as. |
-| pthread stack size | auto-detected | |
+| setting | default | info |
+|:----------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) |
+| OOM score | `0` | |
+| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
+| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
+| hostname | auto-detected | The hostname of the computer running Netdata. |
+| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
+| timezone | auto-detected | The timezone retrieved from the environment variable |
+| run as user | `netdata` | The user Netdata will run as. |
+| pthread stack size | auto-detected | |
### [db] section options
-| setting | default | info |
-|:---------------------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| setting | default | info |
+|:---------------------------------------------:|:----------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. |
-| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. |
-| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. |
-| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. |
-| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` ||
- | dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). |
-| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. |
-| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` |
-| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). |
-| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` |
-| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` |
-| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) |
-| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions |
-| gap when lost iterations above | `1` | |
-| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. |
-| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions |
-| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. |
-| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. |
+| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. |
+| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. |
+| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. |
+| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` |
+| dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). |
+| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. |
+| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` |
+| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). |
+| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` |
+| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` |
+| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) |
+| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions |
+| gap when lost iterations above | `1` | |
+| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. |
+| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions |
+| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. |
+| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. |
> ### Info
>
@@ -113,32 +113,33 @@ Please note that your data history will be lost if you have modified `history` p
### [directories] section options
-| setting | default | info |
-|:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| config | `/etc/netdata` | The directory configuration files are kept. |
-| stock config | `/usr/lib/netdata/conf.d` | |
-| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. |
-| web | `/usr/share/netdata/web` | The directory the web static files are kept. |
-| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. |
-| lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. |
-| home | `/var/cache/netdata` | Contains the db files for the collected metrics. |
-| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. |
-| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. |
-| health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations |
-| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector |
-| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent |
+| setting | default | info |
+|:-------------------:|:------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| config | `/etc/netdata` | The directory configuration files are kept. |
+| stock config | `/usr/lib/netdata/conf.d` | |
+| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. |
+| web | `/usr/share/netdata/web` | The directory the web static files are kept. |
+| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. |
+| lib | `/var/lib/netdata` | Contains the alert log and the Netdata instance GUID. |
+| home | `/var/cache/netdata` | Contains the db files for the collected metrics. |
+| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. |
+| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. |
+| health config | `/etc/netdata/health.d` | The directory containing the user alert configuration files, to override the stock configurations |
+| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alert configuration files for each collector |
+| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent |
### [logs] section options
-| setting | default | info |
-|:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| setting | default | info |
+|:----------------------------------:|:-----------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). |
| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). |
-| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. |
-| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. |
-| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. |
-| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. |
-| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. |
+| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. |
+| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. |
+| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. |
+| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. |
+| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. |
+| severity level | `info` | Controls which log messages are logged, with error being the most important. Supported values: `info` and `error`. |
### [environment variables] section options
@@ -163,20 +164,20 @@ Please note that your data history will be lost if you have modified `history` p
This section controls the general behavior of the health monitoring capabilities of Netdata.
-Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health
+Specific alerts are configured in per-collector config files under the `health.d` directory. For more info, see [health
monitoring](https://github.com/netdata/netdata/blob/master/health/README.md).
-[Alarm notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`.
+[Alert notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`.
-| setting | default | info |
-|:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| enabled | `yes` | Set to `no` to disable all alarms and notifications |
-| in memory max health log entries | 1000 | Size of the alarm history held in RAM |
-| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alarm notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). |
-| run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. |
-| postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. |
-| health log history | `432000` | Specifies the history of alarm events (in seconds) kept in the agent's sqlite database. |
-| enabled alarms | * | Defines which alarms to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alarm or template names. Can be used to disable specific alarms. For example, `enabled alarms = !oom_kill *` will load all alarms except `oom_kill`. |
+| setting | default | info |
+|:----------------------------------------------:|:------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| enabled | `yes` | Set to `no` to disable all alerts and notifications |
+| in memory max health log entries | 1000 | Size of the alert history held in RAM |
+| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alert notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). |
+| run at least every seconds | `10` | Controls how often all alert conditions should be evaluated. |
+| postpone alarms during hibernation for seconds | `60` | Prevents false alerts. May need to be increased if you get alerts during hibernation. |
+| health log history | `432000` | Specifies the history of alert events (in seconds) kept in the agent's sqlite database. |
+| enabled alarms | * | Defines which alerts to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alert or template names. Can be used to disable specific alerts. For example, `enabled alarms = !oom_kill *` will load all alerts except `oom_kill`. |
### [web] section options
@@ -222,10 +223,10 @@ for all internal Netdata plugins.
External plugins will have only 2 options at `netdata.conf`:
-| setting | default | info |
-|:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------|
+| setting | default | info |
+|:---------------:|:--------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). |
-| command options | - | Additional command line options to pass to the plugin. | |
+| command options | - | Additional command line options to pass to the plugin. |
External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their
documentation.
diff --git a/daemon/event_loop.c b/daemon/event_loop.c
index fb3879154..93bac97d0 100644
--- a/daemon/event_loop.c
+++ b/daemon/event_loop.c
@@ -52,6 +52,7 @@ void register_libuv_worker_jobs() {
worker_register_job_name(UV_EVENT_HOST_CONTEXT_LOAD, "metadata load host context");
worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host");
worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup");
+ worker_register_job_name(UV_EVENT_METADATA_ML_LOAD, "metadata load ml models");
// netdatacli
worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command");
diff --git a/daemon/event_loop.h b/daemon/event_loop.h
index 1ff1c2c1c..c1821c646 100644
--- a/daemon/event_loop.h
+++ b/daemon/event_loop.h
@@ -44,6 +44,7 @@ enum event_loop_job {
UV_EVENT_HOST_CONTEXT_LOAD,
UV_EVENT_METADATA_STORE,
UV_EVENT_METADATA_CLEANUP,
+ UV_EVENT_METADATA_ML_LOAD,
// netdatacli
UV_EVENT_SCHEDULE_CMD,
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index ce8d41402..ab910e189 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -2681,9 +2681,12 @@ static void dbengine2_statistics_charts(void) {
static void update_strings_charts() {
static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL;
- static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL, *rd_ops_searches = NULL, *rd_ops_duplications = NULL, *rd_ops_releases = NULL;
- static RRDDIM *rd_entries_entries = NULL, *rd_entries_refs = NULL;
+ static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL;
+ static RRDDIM *rd_entries_entries = NULL;
static RRDDIM *rd_mem = NULL;
+#ifdef NETDATA_INTERNAL_CHECKS
+ static RRDDIM *rd_entries_refs = NULL, *rd_ops_releases = NULL, *rd_ops_duplications = NULL, *rd_ops_searches = NULL;
+#endif
size_t inserts, deletes, searches, entries, references, memory, duplications, releases;
@@ -2706,16 +2709,20 @@ static void update_strings_charts() {
rd_ops_inserts = rrddim_add(st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_ops_deletes = rrddim_add(st_ops, "deletes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+#ifdef NETDATA_INTERNAL_CHECKS
rd_ops_searches = rrddim_add(st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_ops_duplications = rrddim_add(st_ops, "duplications", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_ops_releases = rrddim_add(st_ops, "releases", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+#endif
}
rrddim_set_by_pointer(st_ops, rd_ops_inserts, (collected_number)inserts);
rrddim_set_by_pointer(st_ops, rd_ops_deletes, (collected_number)deletes);
+#ifdef NETDATA_INTERNAL_CHECKS
rrddim_set_by_pointer(st_ops, rd_ops_searches, (collected_number)searches);
rrddim_set_by_pointer(st_ops, rd_ops_duplications, (collected_number)duplications);
rrddim_set_by_pointer(st_ops, rd_ops_releases, (collected_number)releases);
+#endif
rrdset_done(st_ops);
if (unlikely(!st_entries)) {
@@ -2734,11 +2741,15 @@ static void update_strings_charts() {
, RRDSET_TYPE_AREA);
rd_entries_entries = rrddim_add(st_entries, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+#ifdef NETDATA_INTERNAL_CHECKS
rd_entries_refs = rrddim_add(st_entries, "references", NULL, 1, -1, RRD_ALGORITHM_ABSOLUTE);
+#endif
}
rrddim_set_by_pointer(st_entries, rd_entries_entries, (collected_number)entries);
+#ifdef NETDATA_INTERNAL_CHECKS
rrddim_set_by_pointer(st_entries, rd_entries_refs, (collected_number)references);
+#endif
rrdset_done(st_entries);
if (unlikely(!st_mem)) {
@@ -2813,6 +2824,7 @@ struct dictionary_stats dictionary_stats_category_rrdhealth = { .name = "health"
struct dictionary_stats dictionary_stats_category_functions = { .name = "functions" };
struct dictionary_stats dictionary_stats_category_replication = { .name = "replication" };
+#ifdef DICT_WITH_STATS
struct dictionary_categories {
struct dictionary_stats *stats;
const char *family;
@@ -3165,6 +3177,13 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) {
}
}
+static void dictionary_statistics(void) {
+ for(int i = 0; dictionary_categories[i].stats ;i++) {
+ update_dictionary_category_charts(&dictionary_categories[i]);
+ }
+}
+#endif // DICT_WITH_STATS
+
#ifdef NETDATA_TRACE_ALLOCATIONS
struct memory_trace_data {
@@ -3304,12 +3323,6 @@ static void malloc_trace_statistics(void) {
}
#endif
-static void dictionary_statistics(void) {
- for(int i = 0; dictionary_categories[i].stats ;i++) {
- update_dictionary_category_charts(&dictionary_categories[i]);
- }
-}
-
// ---------------------------------------------------------------------------------------------------------------------
// worker utilization
@@ -4171,8 +4184,10 @@ void *global_statistics_main(void *ptr)
worker_is_busy(WORKER_JOB_STRINGS);
update_strings_charts();
+#ifdef DICT_WITH_STATS
worker_is_busy(WORKER_JOB_DICTIONARIES);
dictionary_statistics();
+#endif
#ifdef NETDATA_TRACE_ALLOCATIONS
worker_is_busy(WORKER_JOB_MALLOC_TRACE);
diff --git a/daemon/main.c b/daemon/main.c
index 6ddf57aa1..ab7997969 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -761,7 +761,7 @@ int help(int exitcode) {
" Support : https://github.com/netdata/netdata/issues\n"
" License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n"
"\n"
- " Twitter : https://twitter.com/linuxnetdata\n"
+ " Twitter : https://twitter.com/netdatahq\n"
" LinkedIn : https://linkedin.com/company/netdata-cloud/\n"
" Facebook : https://facebook.com/linuxnetdata/\n"
"\n"
@@ -787,8 +787,7 @@ int help(int exitcode) {
" -W stacksize=N Set the stacksize (in bytes).\n\n"
" -W debug_flags=N Set runtime tracing to debug.log.\n\n"
" -W unittest Run internal unittests and exit.\n\n"
- " -W sqlite-check Check metadata database integrity and exit.\n\n"
- " -W sqlite-fix Check metadata database integrity, fix if needed and exit.\n\n"
+ " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n"
" -W sqlite-compact Reclaim metadata database unused space and exit.\n\n"
#ifdef ENABLE_DBENGINE
" -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
@@ -875,6 +874,10 @@ static void log_init(void) {
setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1);
setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1);
+
+ char *selected_level = config_get(CONFIG_SECTION_LOGS, "severity level", NETDATA_LOG_LEVEL_INFO_STR);
+ global_log_severity_level = log_severity_string_to_severity_level(selected_level);
+ setenv("NETDATA_LOG_SEVERITY_LEVEL", selected_level , 1);
}
char *initialize_lock_directory_path(char *prefix)
@@ -1436,13 +1439,9 @@ int main(int argc, char **argv) {
char* createdataset_string = "createdataset=";
char* stresstest_string = "stresstest=";
#endif
- if(strcmp(optarg, "sqlite-check") == 0) {
- sql_init_database(DB_CHECK_INTEGRITY, 0);
- return 0;
- }
- if(strcmp(optarg, "sqlite-fix") == 0) {
- sql_init_database(DB_CHECK_FIX_DB, 0);
+ if(strcmp(optarg, "sqlite-meta-recover") == 0) {
+ sql_init_database(DB_CHECK_RECOVER, 0);
return 0;
}
@@ -1509,7 +1508,7 @@ int main(int argc, char **argv) {
unittest_running = true;
return aral_unittest(10000);
}
- else if(strcmp(optarg, "stringtest") == 0) {
+ else if(strcmp(optarg, "stringtest") == 0) {
unittest_running = true;
return string_unittest(10000);
}
@@ -1898,6 +1897,7 @@ int main(int argc, char **argv) {
// initialize the log files
open_all_log_files();
+ netdata_log_info("Netdata agent version \""VERSION"\" is starting");
ieee754_doubles = is_system_ieee754_double();
@@ -1909,6 +1909,8 @@ int main(int argc, char **argv) {
replication_initialize();
+ rrd_functions_inflight_init();
+
// --------------------------------------------------------------------
// get the certificate and start security
@@ -1938,8 +1940,6 @@ int main(int argc, char **argv) {
signals_block();
signals_init(); // setup the signals we want to use
- dyn_conf_init();
-
// --------------------------------------------------------------------
// check which threads are enabled and initialize them
@@ -2005,6 +2005,8 @@ int main(int argc, char **argv) {
if(become_daemon(dont_fork, user) == -1)
fatal("Cannot daemonize myself.");
+ dyn_conf_init();
+
netdata_log_info("netdata started on pid %d.", getpid());
delta_startup_time("initialize threads after fork");
diff --git a/daemon/service.c b/daemon/service.c
index a25e2a26b..f7fe86e04 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -105,14 +105,11 @@ static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensio
return done_all_dimensions;
}
-static void svc_rrdset_obsolete_to_archive(RRDSET *st) {
- worker_is_busy(WORKER_JOB_ARCHIVE_CHART);
-
+static void svc_rrdset_obsolete_to_free(RRDSET *st) {
if(!svc_rrdset_archive_obsolete_dimensions(st, true))
return;
- rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED);
- rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE);
+ worker_is_busy(WORKER_JOB_FREE_CHART);
rrdcalc_unlink_all_rrdset_alerts(st);
@@ -130,10 +127,9 @@ static void svc_rrdset_obsolete_to_archive(RRDSET *st) {
worker_is_busy(WORKER_JOB_SAVE_CHART);
rrdset_save(st);
}
-
- worker_is_busy(WORKER_JOB_FREE_CHART);
- rrdset_free(st);
}
+
+ rrdset_free(st);
}
static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
@@ -150,12 +146,15 @@ static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
&& st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
&& st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
)) {
- svc_rrdset_obsolete_to_archive(st);
+ svc_rrdset_obsolete_to_free(st);
}
else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) {
rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
svc_rrdset_archive_obsolete_dimensions(st, false);
}
+ else if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) {
+ rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS);
+ }
}
rrdset_foreach_done(st);
}