diff options
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/README.md | 4 | ||||
-rw-r--r-- | daemon/analytics.c | 10 | ||||
-rw-r--r-- | daemon/analytics.h | 3 | ||||
-rwxr-xr-x | daemon/anonymous-statistics.sh.in | 4 | ||||
-rw-r--r-- | daemon/config/README.md | 131 | ||||
-rw-r--r-- | daemon/event_loop.c | 1 | ||||
-rw-r--r-- | daemon/event_loop.h | 1 | ||||
-rw-r--r-- | daemon/global_statistics.c | 31 | ||||
-rw-r--r-- | daemon/main.c | 26 | ||||
-rw-r--r-- | daemon/service.c | 17 |
10 files changed, 128 insertions, 100 deletions
diff --git a/daemon/README.md b/daemon/README.md index 3fb33e5c7..0707a406c 100644 --- a/daemon/README.md +++ b/daemon/README.md @@ -38,7 +38,7 @@ The command line options of the Netdata 1.10.0 version are the following: Support : https://github.com/netdata/netdata/issues License : https://github.com/netdata/netdata/blob/master/LICENSE.md - Twitter : https://twitter.com/linuxnetdata + Twitter : https://twitter.com/netdatahq LinkedIn : https://linkedin.com/company/netdata-cloud/ Facebook : https://facebook.com/linuxnetdata/ @@ -143,6 +143,8 @@ For most Netdata programs (including standard external plugins shipped by netdat | `ERROR` | Something that might disable a part of netdata.<br/>The log line includes `errno` (if it is not zero). | | `FATAL` | Something prevented a program from running.<br/>The log line includes `errno` (if it is not zero) and the program exited. | +The `FATAL` and `ERROR` messages will always appear in the logs, and `INFO`can be filtered using [severity level](https://github.com/netdata/netdata/tree/master/daemon/config#logs-section-options) option. + So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the program continues to run. diff --git a/daemon/analytics.c b/daemon/analytics.c index 9323c8e8a..c149e2583 100644 --- a/daemon/analytics.c +++ b/daemon/analytics.c @@ -109,6 +109,7 @@ void analytics_free_data(void) freez(analytics_data.netdata_config_use_private_registry); freez(analytics_data.netdata_config_oom_score); freez(analytics_data.netdata_prebuilt_distro); + freez(analytics_data.netdata_fail_reason); } /* @@ -127,7 +128,7 @@ void analytics_set_data(char **name, char *value) /* * Set a string data with a value */ -void analytics_set_data_str(char **name, char *value) +void analytics_set_data_str(char **name, const char *value) { size_t value_string_len; if (*name) { @@ -899,6 +900,7 @@ void set_global_environment() analytics_set_data(&analytics_data.netdata_config_use_private_registry, "null"); analytics_set_data(&analytics_data.netdata_config_oom_score, "null"); analytics_set_data(&analytics_data.netdata_prebuilt_distro, "null"); + analytics_set_data(&analytics_data.netdata_fail_reason, "null"); analytics_data.prometheus_hits = 0; analytics_data.shell_hits = 0; @@ -974,6 +976,7 @@ void send_statistics(const char *action, const char *action_result, const char * action_result = ""; if (!action_data) action_data = ""; + char *command_to_run = mallocz( sizeof(char) * (strlen(action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); @@ -981,7 +984,7 @@ void send_statistics(const char *action, const char *action_result, const char * sprintf( command_to_run, - "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", + "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", as_script, action, action_result, @@ -1024,7 +1027,8 @@ void send_statistics(const char *action, const char *action_result, const char * analytics_data.netdata_config_is_private_registry, analytics_data.netdata_config_use_private_registry, analytics_data.netdata_config_oom_score, - analytics_data.netdata_prebuilt_distro); + analytics_data.netdata_prebuilt_distro, + analytics_data.netdata_fail_reason); netdata_log_info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data); diff --git a/daemon/analytics.h b/daemon/analytics.h index 34418316f..0a5cc458d 100644 --- a/daemon/analytics.h +++ b/daemon/analytics.h @@ -18,7 +18,7 @@ #define ANALYTICS_MAX_DASHBOARD_HITS 255 /* Needed to calculate the space needed for parameters */ -#define ANALYTICS_NO_OF_ITEMS 39 +#define ANALYTICS_NO_OF_ITEMS 40 struct analytics_data { char *netdata_config_stream_enabled; @@ -60,6 +60,7 @@ struct analytics_data { char *netdata_config_use_private_registry; char *netdata_config_oom_score; char *netdata_prebuilt_distro; + char *netdata_fail_reason; size_t data_length; diff --git a/daemon/anonymous-statistics.sh.in b/daemon/anonymous-statistics.sh.in index 6b27dfea4..d12e7e32a 100755 --- a/daemon/anonymous-statistics.sh.in +++ b/daemon/anonymous-statistics.sh.in @@ -68,6 +68,7 @@ NETDATA_IS_PRIVATE_REGISTRY="${39}" NETDATA_USE_PRIVATE_REGISTRY="${40}" NETDATA_CONFIG_OOM_SCORE="${41}" NETDATA_PREBUILT_DISTRO="${42}" +NETDATA_FAIL_REASON="${43}" [ -z "$NETDATA_REGISTRY_UNIQUE_ID" ] && NETDATA_REGISTRY_UNIQUE_ID="00000000-0000-0000-0000-000000000000" @@ -175,7 +176,8 @@ REQ_BODY="$(cat << EOF "mirrored_host_count": ${NETDATA_MIRRORED_HOST_COUNT}, "mirrored_hosts_reachable": ${NETDATA_MIRRORED_HOSTS_REACHABLE}, "mirrored_hosts_unreachable": ${NETDATA_MIRRORED_HOSTS_UNREACHABLE}, - "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS} + "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS}, + "netdata_fail_reason": ${NETDATA_FAIL_REASON} } } EOF diff --git a/daemon/config/README.md b/daemon/config/README.md index bc5a5885c..11ba2a1bc 100644 --- a/daemon/config/README.md +++ b/daemon/config/README.md @@ -72,40 +72,40 @@ Please note that your data history will be lost if you have modified `history` p ### [global] section options -| setting | default | info | -|:-------------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) | -| OOM score | `0` | | -| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). | -| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). | -| hostname | auto-detected | The hostname of the computer running Netdata. | -| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | -| timezone | auto-detected | The timezone retrieved from the environment variable | -| run as user | `netdata` | The user Netdata will run as. | -| pthread stack size | auto-detected | | +| setting | default | info | +|:----------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | ### [db] section options -| setting | default | info | -|:---------------------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| setting | default | info | +|:---------------------------------------------:|:----------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. | -| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. | -| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | -| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | -| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` || - | dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | -| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | -| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` | -| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). | -| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` | -| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` | -| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) | -| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | -| gap when lost iterations above | `1` | | -| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | -| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions | -| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. | -| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | +| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. | +| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | +| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | +| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` | +| dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | +| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | +| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). | +| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` | +| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) | +| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| gap when lost iterations above | `1` | | +| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | +| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions | +| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. | +| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | > ### Info > @@ -113,32 +113,33 @@ Please note that your data history will be lost if you have modified `history` p ### [directories] section options -| setting | default | info | -|:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| config | `/etc/netdata` | The directory configuration files are kept. | -| stock config | `/usr/lib/netdata/conf.d` | | -| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. | -| web | `/usr/share/netdata/web` | The directory the web static files are kept. | -| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | -| lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. | -| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | -| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | -| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | -| health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations | -| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector | -| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | +| setting | default | info | +|:-------------------:|:------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| config | `/etc/netdata` | The directory configuration files are kept. | +| stock config | `/usr/lib/netdata/conf.d` | | +| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. | +| web | `/usr/share/netdata/web` | The directory the web static files are kept. | +| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | +| lib | `/var/lib/netdata` | Contains the alert log and the Netdata instance GUID. | +| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | +| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | +| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | +| health config | `/etc/netdata/health.d` | The directory containing the user alert configuration files, to override the stock configurations | +| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alert configuration files for each collector | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | ### [logs] section options -| setting | default | info | -|:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| setting | default | info | +|:----------------------------------:|:-----------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). | | debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). | -| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | -| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | -| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | -| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | -| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | +| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | +| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | +| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | +| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | +| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | +| severity level | `info` | Controls which log messages are logged, with error being the most important. Supported values: `info` and `error`. | ### [environment variables] section options @@ -163,20 +164,20 @@ Please note that your data history will be lost if you have modified `history` p This section controls the general behavior of the health monitoring capabilities of Netdata. -Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health +Specific alerts are configured in per-collector config files under the `health.d` directory. For more info, see [health monitoring](https://github.com/netdata/netdata/blob/master/health/README.md). -[Alarm notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`. +[Alert notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`. -| setting | default | info | -|:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| enabled | `yes` | Set to `no` to disable all alarms and notifications | -| in memory max health log entries | 1000 | Size of the alarm history held in RAM | -| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alarm notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | -| run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. | -| postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. | -| health log history | `432000` | Specifies the history of alarm events (in seconds) kept in the agent's sqlite database. | -| enabled alarms | * | Defines which alarms to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alarm or template names. Can be used to disable specific alarms. For example, `enabled alarms = !oom_kill *` will load all alarms except `oom_kill`. | +| setting | default | info | +|:----------------------------------------------:|:------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enabled | `yes` | Set to `no` to disable all alerts and notifications | +| in memory max health log entries | 1000 | Size of the alert history held in RAM | +| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alert notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | +| run at least every seconds | `10` | Controls how often all alert conditions should be evaluated. | +| postpone alarms during hibernation for seconds | `60` | Prevents false alerts. May need to be increased if you get alerts during hibernation. | +| health log history | `432000` | Specifies the history of alert events (in seconds) kept in the agent's sqlite database. | +| enabled alarms | * | Defines which alerts to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alert or template names. Can be used to disable specific alerts. For example, `enabled alarms = !oom_kill *` will load all alerts except `oom_kill`. | ### [web] section options @@ -222,10 +223,10 @@ for all internal Netdata plugins. External plugins will have only 2 options at `netdata.conf`: -| setting | default | info | -|:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------| +| setting | default | info | +|:---------------:|:--------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). | -| command options | - | Additional command line options to pass to the plugin. | | +| command options | - | Additional command line options to pass to the plugin. | External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their documentation. diff --git a/daemon/event_loop.c b/daemon/event_loop.c index fb3879154..93bac97d0 100644 --- a/daemon/event_loop.c +++ b/daemon/event_loop.c @@ -52,6 +52,7 @@ void register_libuv_worker_jobs() { worker_register_job_name(UV_EVENT_HOST_CONTEXT_LOAD, "metadata load host context"); worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host"); worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup"); + worker_register_job_name(UV_EVENT_METADATA_ML_LOAD, "metadata load ml models"); // netdatacli worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command"); diff --git a/daemon/event_loop.h b/daemon/event_loop.h index 1ff1c2c1c..c1821c646 100644 --- a/daemon/event_loop.h +++ b/daemon/event_loop.h @@ -44,6 +44,7 @@ enum event_loop_job { UV_EVENT_HOST_CONTEXT_LOAD, UV_EVENT_METADATA_STORE, UV_EVENT_METADATA_CLEANUP, + UV_EVENT_METADATA_ML_LOAD, // netdatacli UV_EVENT_SCHEDULE_CMD, diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index ce8d41402..ab910e189 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -2681,9 +2681,12 @@ static void dbengine2_statistics_charts(void) { static void update_strings_charts() { static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL; - static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL, *rd_ops_searches = NULL, *rd_ops_duplications = NULL, *rd_ops_releases = NULL; - static RRDDIM *rd_entries_entries = NULL, *rd_entries_refs = NULL; + static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL; + static RRDDIM *rd_entries_entries = NULL; static RRDDIM *rd_mem = NULL; +#ifdef NETDATA_INTERNAL_CHECKS + static RRDDIM *rd_entries_refs = NULL, *rd_ops_releases = NULL, *rd_ops_duplications = NULL, *rd_ops_searches = NULL; +#endif size_t inserts, deletes, searches, entries, references, memory, duplications, releases; @@ -2706,16 +2709,20 @@ static void update_strings_charts() { rd_ops_inserts = rrddim_add(st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rd_ops_deletes = rrddim_add(st_ops, "deletes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); +#ifdef NETDATA_INTERNAL_CHECKS rd_ops_searches = rrddim_add(st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rd_ops_duplications = rrddim_add(st_ops, "duplications", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rd_ops_releases = rrddim_add(st_ops, "releases", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); +#endif } rrddim_set_by_pointer(st_ops, rd_ops_inserts, (collected_number)inserts); rrddim_set_by_pointer(st_ops, rd_ops_deletes, (collected_number)deletes); +#ifdef NETDATA_INTERNAL_CHECKS rrddim_set_by_pointer(st_ops, rd_ops_searches, (collected_number)searches); rrddim_set_by_pointer(st_ops, rd_ops_duplications, (collected_number)duplications); rrddim_set_by_pointer(st_ops, rd_ops_releases, (collected_number)releases); +#endif rrdset_done(st_ops); if (unlikely(!st_entries)) { @@ -2734,11 +2741,15 @@ static void update_strings_charts() { , RRDSET_TYPE_AREA); rd_entries_entries = rrddim_add(st_entries, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); +#ifdef NETDATA_INTERNAL_CHECKS rd_entries_refs = rrddim_add(st_entries, "references", NULL, 1, -1, RRD_ALGORITHM_ABSOLUTE); +#endif } rrddim_set_by_pointer(st_entries, rd_entries_entries, (collected_number)entries); +#ifdef NETDATA_INTERNAL_CHECKS rrddim_set_by_pointer(st_entries, rd_entries_refs, (collected_number)references); +#endif rrdset_done(st_entries); if (unlikely(!st_mem)) { @@ -2813,6 +2824,7 @@ struct dictionary_stats dictionary_stats_category_rrdhealth = { .name = "health" struct dictionary_stats dictionary_stats_category_functions = { .name = "functions" }; struct dictionary_stats dictionary_stats_category_replication = { .name = "replication" }; +#ifdef DICT_WITH_STATS struct dictionary_categories { struct dictionary_stats *stats; const char *family; @@ -3165,6 +3177,13 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) { } } +static void dictionary_statistics(void) { + for(int i = 0; dictionary_categories[i].stats ;i++) { + update_dictionary_category_charts(&dictionary_categories[i]); + } +} +#endif // DICT_WITH_STATS + #ifdef NETDATA_TRACE_ALLOCATIONS struct memory_trace_data { @@ -3304,12 +3323,6 @@ static void malloc_trace_statistics(void) { } #endif -static void dictionary_statistics(void) { - for(int i = 0; dictionary_categories[i].stats ;i++) { - update_dictionary_category_charts(&dictionary_categories[i]); - } -} - // --------------------------------------------------------------------------------------------------------------------- // worker utilization @@ -4171,8 +4184,10 @@ void *global_statistics_main(void *ptr) worker_is_busy(WORKER_JOB_STRINGS); update_strings_charts(); +#ifdef DICT_WITH_STATS worker_is_busy(WORKER_JOB_DICTIONARIES); dictionary_statistics(); +#endif #ifdef NETDATA_TRACE_ALLOCATIONS worker_is_busy(WORKER_JOB_MALLOC_TRACE); diff --git a/daemon/main.c b/daemon/main.c index 6ddf57aa1..ab7997969 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -761,7 +761,7 @@ int help(int exitcode) { " Support : https://github.com/netdata/netdata/issues\n" " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n" "\n" - " Twitter : https://twitter.com/linuxnetdata\n" + " Twitter : https://twitter.com/netdatahq\n" " LinkedIn : https://linkedin.com/company/netdata-cloud/\n" " Facebook : https://facebook.com/linuxnetdata/\n" "\n" @@ -787,8 +787,7 @@ int help(int exitcode) { " -W stacksize=N Set the stacksize (in bytes).\n\n" " -W debug_flags=N Set runtime tracing to debug.log.\n\n" " -W unittest Run internal unittests and exit.\n\n" - " -W sqlite-check Check metadata database integrity and exit.\n\n" - " -W sqlite-fix Check metadata database integrity, fix if needed and exit.\n\n" + " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n" " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n" #ifdef ENABLE_DBENGINE " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" @@ -875,6 +874,10 @@ static void log_init(void) { setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1); setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1); + + char *selected_level = config_get(CONFIG_SECTION_LOGS, "severity level", NETDATA_LOG_LEVEL_INFO_STR); + global_log_severity_level = log_severity_string_to_severity_level(selected_level); + setenv("NETDATA_LOG_SEVERITY_LEVEL", selected_level , 1); } char *initialize_lock_directory_path(char *prefix) @@ -1436,13 +1439,9 @@ int main(int argc, char **argv) { char* createdataset_string = "createdataset="; char* stresstest_string = "stresstest="; #endif - if(strcmp(optarg, "sqlite-check") == 0) { - sql_init_database(DB_CHECK_INTEGRITY, 0); - return 0; - } - if(strcmp(optarg, "sqlite-fix") == 0) { - sql_init_database(DB_CHECK_FIX_DB, 0); + if(strcmp(optarg, "sqlite-meta-recover") == 0) { + sql_init_database(DB_CHECK_RECOVER, 0); return 0; } @@ -1509,7 +1508,7 @@ int main(int argc, char **argv) { unittest_running = true; return aral_unittest(10000); } - else if(strcmp(optarg, "stringtest") == 0) { + else if(strcmp(optarg, "stringtest") == 0) { unittest_running = true; return string_unittest(10000); } @@ -1898,6 +1897,7 @@ int main(int argc, char **argv) { // initialize the log files open_all_log_files(); + netdata_log_info("Netdata agent version \""VERSION"\" is starting"); ieee754_doubles = is_system_ieee754_double(); @@ -1909,6 +1909,8 @@ int main(int argc, char **argv) { replication_initialize(); + rrd_functions_inflight_init(); + // -------------------------------------------------------------------- // get the certificate and start security @@ -1938,8 +1940,6 @@ int main(int argc, char **argv) { signals_block(); signals_init(); // setup the signals we want to use - dyn_conf_init(); - // -------------------------------------------------------------------- // check which threads are enabled and initialize them @@ -2005,6 +2005,8 @@ int main(int argc, char **argv) { if(become_daemon(dont_fork, user) == -1) fatal("Cannot daemonize myself."); + dyn_conf_init(); + netdata_log_info("netdata started on pid %d.", getpid()); delta_startup_time("initialize threads after fork"); diff --git a/daemon/service.c b/daemon/service.c index a25e2a26b..f7fe86e04 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -105,14 +105,11 @@ static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensio return done_all_dimensions; } -static void svc_rrdset_obsolete_to_archive(RRDSET *st) { - worker_is_busy(WORKER_JOB_ARCHIVE_CHART); - +static void svc_rrdset_obsolete_to_free(RRDSET *st) { if(!svc_rrdset_archive_obsolete_dimensions(st, true)) return; - rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED); - rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); + worker_is_busy(WORKER_JOB_FREE_CHART); rrdcalc_unlink_all_rrdset_alerts(st); @@ -130,10 +127,9 @@ static void svc_rrdset_obsolete_to_archive(RRDSET *st) { worker_is_busy(WORKER_JOB_SAVE_CHART); rrdset_save(st); } - - worker_is_busy(WORKER_JOB_FREE_CHART); - rrdset_free(st); } + + rrdset_free(st); } static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { @@ -150,12 +146,15 @@ static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now )) { - svc_rrdset_obsolete_to_archive(st); + svc_rrdset_obsolete_to_free(st); } else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) { rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); svc_rrdset_archive_obsolete_dimensions(st, false); } + else if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) { + rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); + } } rrdset_foreach_done(st); } |