From b5321aff06d6ea8d730d62aec2ffd8e9271c1ffc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 14 Apr 2022 20:12:10 +0200 Subject: Adding upstream version 1.34.0. Signed-off-by: Daniel Baumann --- daemon/README.md | 61 ++----------- daemon/analytics.c | 28 +++--- daemon/anonymous-statistics.sh.in | 6 +- daemon/buildinfo.c | 174 ++++++++++++++++++++++++------------- daemon/buildinfo.h | 8 +- daemon/commands.c | 8 +- daemon/common.h | 4 +- daemon/config/README.md | 89 +++++++++---------- daemon/daemon.c | 30 +++---- daemon/get-kubernetes-labels.sh.in | 21 ++++- daemon/global_statistics.c | 12 +-- daemon/main.c | 53 +++++------ daemon/service.c | 5 ++ daemon/static_threads.c | 19 ++-- daemon/static_threads_linux.c | 9 -- daemon/system-info.sh | 91 ++++++++++++++++--- daemon/unit_test.c | 44 +++++++++- daemon/unit_test.h | 1 + 18 files changed, 395 insertions(+), 268 deletions(-) (limited to 'daemon') diff --git a/daemon/README.md b/daemon/README.md index 1ea865f89..44abfa8e9 100644 --- a/daemon/README.md +++ b/daemon/README.md @@ -254,57 +254,14 @@ where: See [debugging](#debugging). -## OOM Score - -Netdata runs with `OOMScore = 1000`. This means Netdata will be the first to be killed when your server runs out of -memory. - -You can set Netdata OOMScore in `netdata.conf`, like this: - -```conf -[global] - OOM score = 1000 -``` - -Netdata logs its OOM score when it starts: - -```sh -# grep OOM /var/log/netdata/error.log -2017-10-15 03:47:31: netdata INFO : Adjusted my Out-Of-Memory (OOM) score from 0 to 1000. -``` - -### OOM score and systemd - -Netdata will not be able to lower its OOM Score below zero, when it is started as the `netdata` user (systemd case). - -To allow Netdata control its OOM Score in such cases, you will need to edit `netdata.service` and set: - -```sh -[Service] -# The minimum Netdata Out-Of-Memory (OOM) score. -# Netdata (via [global].OOM score in netdata.conf) can only increase the value set here. -# To decrease it, set the minimum here and set the same or a higher value in netdata.conf. -# Valid values: -1000 (never kill netdata) to 1000 (always kill netdata). -OOMScoreAdjust=-1000 -``` - -Run `systemctl daemon-reload` to reload these changes. - -The above, sets and OOMScore for Netdata to `-1000`, so that Netdata can increase it via `netdata.conf`. - -If you want to control it entirely via systemd, you can set in `netdata.conf`: - -```conf -[global] - OOM score = keep -``` - -Using the above, whatever OOM Score you have set at `netdata.service` will be maintained by netdata. - ## Netdata process scheduling policy -By default Netdata runs with the `idle` process scheduling policy, so that it uses CPU resources, only when there is -idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on the charts. +By default Netdata versions prior to 1.34.0 run with the `idle` process scheduling policy, so that it uses CPU +resources, only when there is idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on +the charts. + +Starting with version 1.34.0, Netdata instead uses the `batch` scheduling policy by default. This largely eliminates +issues with gaps in charts on busy systems while still keeping the impact on the rest of the system low. You can set Netdata scheduling policy in `netdata.conf`, like this: @@ -315,9 +272,9 @@ You can set Netdata scheduling policy in `netdata.conf`, like this: You can use the following: -| policy | description | +| policy | description | | :-----------------------: | :---------- | -| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | +| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | | `other`
or
`nice` | this is the default policy for all processes under Linux. It provides dynamic priorities based on the `nice` level of each process. Check below for setting this `nice` level for netdata. | | `batch` | This policy is similar to `other` in that it schedules the thread according to its dynamic priority (based on the `nice` value). The difference is that this policy will cause the scheduler to always assume that the thread is CPU-intensive. Consequently, the scheduler will apply a small scheduling penalty with respect to wake-up behavior, so that this thread is mildly disfavored in scheduling decisions. | | `fifo` | `fifo` can be used only with static priorities higher than 0, which means that when a `fifo` threads becomes runnable, it will always immediately preempt any currently running `other`, `batch`, or `idle` thread. `fifo` is a simple scheduling algorithm without time slicing. | @@ -551,4 +508,4 @@ valgrind $(which netdata) -D Netdata will start and it will be a lot slower. Now reproduce the crash and `valgrind` will dump on your console the stack trace. Open a new github issue and post the output. -[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdaemon%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) + diff --git a/daemon/analytics.c b/daemon/analytics.c index 4501a46a5..0af41fdda 100644 --- a/daemon/analytics.c +++ b/daemon/analytics.c @@ -362,21 +362,15 @@ void analytics_alarms_notifications(void) void analytics_get_install_type(void) { - struct install_type_info t = get_install_type(); - - if (t.install_type == NULL) { + if (localhost->system_info->install_type == NULL) { analytics_set_data_str(&analytics_data.netdata_install_type, "unknown"); } else { - analytics_set_data_str(&analytics_data.netdata_install_type, t.install_type); + analytics_set_data_str(&analytics_data.netdata_install_type, localhost->system_info->install_type); } - if (t.prebuilt_distro != NULL) { - analytics_set_data_str(&analytics_data.netdata_prebuilt_distro, t.prebuilt_distro); + if (localhost->system_info->prebuilt_dist != NULL) { + analytics_set_data_str(&analytics_data.netdata_prebuilt_distro, localhost->system_info->prebuilt_dist); } - - freez(t.prebuilt_arch); - freez(t.prebuilt_distro); - freez(t.install_type); } /* @@ -422,12 +416,16 @@ void analytics_metrics(void) rrdset_foreach_read(st, localhost) { rrdset_rdlock(st); - rrddim_foreach_read(rd, st) - { - if (rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN) || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) - continue; - dimensions++; + + if (rrdset_is_available_for_viewers(st)) { + rrddim_foreach_read(rd, st) + { + if (rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN) || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + continue; + dimensions++; + } } + rrdset_unlock(st); } { diff --git a/daemon/anonymous-statistics.sh.in b/daemon/anonymous-statistics.sh.in index 9167b7062..9f8df188d 100755 --- a/daemon/anonymous-statistics.sh.in +++ b/daemon/anonymous-statistics.sh.in @@ -18,7 +18,11 @@ ACTION_DATA=$(echo "${ACTION_DATA}" | tr '"' "'") # ------------------------------------------------------------------------------------------------- # check opt-out -if [ -f "@configdir_POST@/.opt-out-from-anonymous-statistics" ] || [ ! "${DO_NOT_TRACK:-0}" -eq 0 ] || [ -n "$DO_NOT_TRACK" ]; then +if [ -f "@configdir_POST@/.opt-out-from-anonymous-statistics" ] || + [ ! "${DISABLE_TELEMETRY:-0}" -eq 0 ] || + [ -n "$DISABLE_TELEMETRY" ] || + [ ! "${DO_NOT_TRACK:-0}" -eq 0 ] || + [ -n "$DO_NOT_TRACK" ]; then exit 0 fi diff --git a/daemon/buildinfo.c b/daemon/buildinfo.c index b64a78f29..86c586afc 100644 --- a/daemon/buildinfo.c +++ b/daemon/buildinfo.c @@ -60,7 +60,6 @@ // Optional libraries #ifdef HAVE_PROTOBUF -#if defined(ACLK_NG) || defined(ENABLE_PROMETHEUS_REMOTE_WRITE) #define FEAT_PROTOBUF 1 #ifdef BUNDLED_PROTOBUF #define FEAT_PROTOBUF_BUNDLED " (bundled)" @@ -71,10 +70,6 @@ #define FEAT_PROTOBUF 0 #define FEAT_PROTOBUF_BUNDLED "" #endif -#else -#define FEAT_PROTOBUF 0 -#define FEAT_PROTOBUF_BUNDLED "" -#endif #ifdef ENABLE_JSONC #define FEAT_JSONC 1 @@ -222,9 +217,8 @@ char *get_value_from_key(char *buffer, char *key) { return s; } -struct install_type_info get_install_type() { +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist) { char *install_type_filename; - struct install_type_info ret = {.install_type = NULL, .prebuilt_arch = NULL, .prebuilt_distro = NULL}; int install_type_filename_len = (strlen(netdata_configured_user_config_dir) + strlen(".install-type") + 3); install_type_filename = mallocz(sizeof(char) * install_type_filename_len); @@ -237,41 +231,42 @@ struct install_type_info get_install_type() { while ((s = fgets_trim_len(buf, 256, fp, &len))) { if (!strncmp(buf, "INSTALL_TYPE='", 14)) - ret.install_type = strdupz((char *)get_value_from_key(buf, "INSTALL_TYPE")); + *install_type = strdupz((char *)get_value_from_key(buf, "INSTALL_TYPE")); else if (!strncmp(buf, "PREBUILT_ARCH='", 15)) - ret.prebuilt_arch = strdupz((char *)get_value_from_key(buf, "PREBUILT_ARCH")); + *prebuilt_arch = strdupz((char *)get_value_from_key(buf, "PREBUILT_ARCH")); else if (!strncmp(buf, "PREBUILT_DISTRO='", 17)) - ret.prebuilt_distro = strdupz((char *)get_value_from_key(buf, "PREBUILT_DISTRO")); + *prebuilt_dist = strdupz((char *)get_value_from_key(buf, "PREBUILT_DISTRO")); } fclose(fp); } freez(install_type_filename); - - return ret; } void print_build_info(void) { - struct install_type_info t = get_install_type(); + char *install_type = NULL; + char *prebuilt_arch = NULL; + char *prebuilt_distro = NULL; + get_install_type(&install_type, &prebuilt_arch, &prebuilt_distro); printf("Configure options: %s\n", CONFIGURE_COMMAND); - if (t.install_type == NULL) { + if (install_type == NULL) { printf("Install type: unknown\n"); } else { - printf("Install type: %s\n", t.install_type); + printf("Install type: %s\n", install_type); } - if (t.prebuilt_arch != NULL) { - printf(" Binary architecture: %s\n", t.prebuilt_arch); + if (prebuilt_arch != NULL) { + printf(" Binary architecture: %s\n", prebuilt_arch); } - if (t.prebuilt_distro != NULL) { - printf(" Packaging distro: %s\n", t.prebuilt_distro); + if (prebuilt_distro != NULL) { + printf(" Packaging distro: %s\n", prebuilt_distro); } - freez(t.install_type); - freez(t.prebuilt_arch); - freez(t.prebuilt_distro); + freez(install_type); + freez(prebuilt_arch); + freez(prebuilt_distro); printf("Features:\n"); printf(" dbengine: %s\n", FEAT_YES_NO(FEAT_DBENGINE)); @@ -372,41 +367,104 @@ void print_build_info_json(void) { printf("}\n"); }; -//return a list of enabled features for use in analytics -//find a way to have proper | +#define add_to_bi(buffer, str) \ + { if(first) { \ + buffer_strcat (b, str); \ + first = 0; \ + } else \ + buffer_strcat (b, "|" str); } + void analytics_build_info(BUFFER *b) { - if(FEAT_DBENGINE) buffer_strcat (b, "dbengine"); - if(FEAT_NATIVE_HTTPS) buffer_strcat (b, "|Native HTTPS"); - if(FEAT_CLOUD) buffer_strcat (b, "|Netdata Cloud"); - if(FEAT_CLOUD) buffer_strcat (b, "|ACLK Next Generation"); - if(NEW_CLOUD_PROTO) buffer_strcat (b, "|New Cloud Protocol Support"); - if(FEAT_TLS_HOST_VERIFY) buffer_strcat (b, "|TLS Host Verification"); - if(FEAT_ML) buffer_strcat (b, "|Machine Learning"); - if(FEAT_STREAM_COMPRESSION) buffer_strcat (b, "|Stream Compression"); - - if(FEAT_PROTOBUF) buffer_strcat (b, "|protobuf"); - if(FEAT_JEMALLOC) buffer_strcat (b, "|jemalloc"); - if(FEAT_JSONC) buffer_strcat (b, "|JSON-C"); - if(FEAT_LIBCAP) buffer_strcat (b, "|libcap"); - if(FEAT_CRYPTO) buffer_strcat (b, "|libcrypto"); - if(FEAT_LIBM) buffer_strcat (b, "|libm"); - - if(FEAT_TCMALLOC) buffer_strcat(b, "|tcalloc"); - if(FEAT_ZLIB) buffer_strcat(b, "|zlib"); - - if(FEAT_APPS_PLUGIN) buffer_strcat(b, "|apps"); - if(FEAT_CGROUP_NET) buffer_strcat(b, "|cgroup Network Tracking"); - if(FEAT_CUPS) buffer_strcat(b, "|CUPS"); - if(FEAT_EBPF) buffer_strcat(b, "|EBPF"); - if(FEAT_IPMI) buffer_strcat(b, "|IPMI"); - if(FEAT_NFACCT) buffer_strcat(b, "|NFACCT"); - if(FEAT_PERF) buffer_strcat(b, "|perf"); - if(FEAT_SLABINFO) buffer_strcat(b, "|slabinfo"); - if(FEAT_XEN) buffer_strcat(b, "|Xen"); - if(FEAT_XEN_VBD_ERROR) buffer_strcat(b, "|Xen VBD Error Tracking"); - - if(FEAT_KINESIS) buffer_strcat(b, "|AWS Kinesis"); - if(FEAT_PUBSUB) buffer_strcat(b, "|GCP PubSub"); - if(FEAT_MONGO) buffer_strcat(b, "|MongoDB"); - if(FEAT_REMOTE_WRITE) buffer_strcat(b, "|Prometheus Remote Write"); + int first = 1; +#ifdef ENABLE_DBENGINE + add_to_bi(b, "dbengine"); +#endif +#ifdef ENABLE_HTTPS + add_to_bi(b, "Native HTTPS"); +#endif +#ifdef ENABLE_ACLK + add_to_bi(b, "Netdata Cloud|ACLK Next Generation"); +#endif +#ifdef ENABLE_NEW_CLOUD_PROTOCOL + add_to_bi(b, "New Cloud Protocol Support"); +#endif +#if (FEAT_TLS_HOST_VERIFY!=0) + add_to_bi(b, "TLS Host Verification"); +#endif +#ifdef ENABLE_ML + add_to_bi(b, "Machine Learning"); +#endif +#ifdef ENABLE_COMPRESSION + add_to_bi(b, "Stream Compression"); +#endif + +#ifdef HAVE_PROTOBUF + add_to_bi(b, "protobuf"); +#endif +#ifdef ENABLE_JEMALLOC + add_to_bi(b, "jemalloc"); +#endif +#ifdef ENABLE_JSONC + add_to_bi(b, "JSON-C"); +#endif +#ifdef HAVE_CAPABILITY + add_to_bi(b, "libcap"); +#endif +#ifdef HAVE_CRYPTO + add_to_bi(b, "libcrypto"); +#endif +#ifdef STORAGE_WITH_MATH + add_to_bi(b, "libm"); +#endif + +#ifdef ENABLE_TCMALLOC + add_to_bi(b, "tcalloc"); +#endif +#ifdef NETDATA_WITH_ZLIB + add_to_bi(b, "zlib"); +#endif + +#ifdef ENABLE_APPS_PLUGIN + add_to_bi(b, "apps"); +#endif +#ifdef HAVE_SETNS + add_to_bi(b, "cgroup Network Tracking"); +#endif +#ifdef HAVE_CUPS + add_to_bi(b, "CUPS"); +#endif +#ifdef HAVE_LIBBPF + add_to_bi(b, "EBPF"); +#endif +#ifdef HAVE_FREEIPMI + add_to_bi(b, "IPMI"); +#endif +#ifdef HAVE_NFACCT + add_to_bi(b, "NFACCT"); +#endif +#ifdef ENABLE_PERF_PLUGIN + add_to_bi(b, "perf"); +#endif +#ifdef ENABLE_SLABINFO + add_to_bi(b, "slabinfo"); +#endif +#ifdef HAVE_LIBXENSTAT + add_to_bi(b, "Xen"); +#endif +#ifdef HAVE_XENSTAT_VBD_ERROR + add_to_bi(b, "Xen VBD Error Tracking"); +#endif + +#ifdef HAVE_KINESIS + add_to_bi(b, "AWS Kinesis"); +#endif +#ifdef ENABLE_EXPORTING_PUBSUB + add_to_bi(b, "GCP PubSub"); +#endif +#ifdef HAVE_MONGOC + add_to_bi(b, "MongoDB"); +#endif +#ifdef ENABLE_PROMETHEUS_REMOTE_WRITE + add_to_bi(b, "Prometheus Remote Write"); +#endif } diff --git a/daemon/buildinfo.h b/daemon/buildinfo.h index e2a31c94a..542a0e92e 100644 --- a/daemon/buildinfo.h +++ b/daemon/buildinfo.h @@ -3,18 +3,12 @@ #ifndef NETDATA_BUILDINFO_H #define NETDATA_BUILDINFO_H 1 -struct install_type_info { - char *install_type; - char *prebuilt_arch; - char *prebuilt_distro; -}; - extern void print_build_info(void); extern void print_build_info_json(void); extern char *get_value_from_key(char *buffer, char *key); -extern struct install_type_info get_install_type(); +extern void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist); #endif // NETDATA_BUILDINFO_H diff --git a/daemon/commands.c b/daemon/commands.c index ddf20fe0b..6efc37c96 100644 --- a/daemon/commands.c +++ b/daemon/commands.c @@ -387,6 +387,7 @@ static void pipe_write_cb(uv_write_t* req, int status) uv_close((uv_handle_t *)client, pipe_close_cb); --clients; + freez(client->data); info("Command Clients = %u\n", clients); } @@ -400,6 +401,10 @@ static inline void add_string_to_command_reply(char *reply_string, unsigned *rep unsigned len; len = strlen(str); + + if (MAX_COMMAND_LENGTH - 1 < len + *reply_string_size) + len = MAX_COMMAND_LENGTH - *reply_string_size - 1; + strncpyz(reply_string + *reply_string_size, str, len); *reply_string_size += len; } @@ -407,7 +412,7 @@ static inline void add_string_to_command_reply(char *reply_string, unsigned *rep static void send_command_reply(struct command_context *cmd_ctx, cmd_status_t status, char *message) { int ret; - char reply_string[MAX_COMMAND_LENGTH] = {'\0', }; + char *reply_string = mallocz(MAX_COMMAND_LENGTH); char exit_status_string[MAX_EXIT_STATUS_LENGTH + 1] = {'\0', }; unsigned reply_string_size = 0; uv_buf_t write_buf; @@ -424,6 +429,7 @@ static void send_command_reply(struct command_context *cmd_ctx, cmd_status_t sta } cmd_ctx->write_req.data = client; + client->data = reply_string; write_buf.base = reply_string; write_buf.len = reply_string_size; ret = uv_write(&cmd_ctx->write_req, (uv_stream_t *)client, &write_buf, 1, pipe_write_cb); diff --git a/daemon/common.h b/daemon/common.h index 5354e2ae0..e11a6d6b6 100644 --- a/daemon/common.h +++ b/daemon/common.h @@ -51,9 +51,7 @@ // the registry is actually an API feature #include "registry/registry.h" -// backends for archiving the metrics -#include "backends/backends.h" -// the new exporting engine for archiving the metrics +// exporting engine for archiving the metrics #include "exporting/exporting_engine.h" // the netdata API diff --git a/daemon/config/README.md b/daemon/config/README.md index cc755af78..c3c639923 100644 --- a/daemon/config/README.md +++ b/daemon/config/README.md @@ -19,10 +19,9 @@ This config file **is not needed by default**. Netdata works fine out of the box settings. 4. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/health/README.md) 5. `[registry]` for the [Netdata registry](/registry/README.md). -6. `[backend]` to set up [streaming and replication](/streaming/README.md) options. -7. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md). -8. `[plugin:NAME]` sections for each collector plugin, under the comment [Per plugin configuration](#per-plugin-configuration). -9. `[CHART_NAME]` sections for each chart defined, under the comment [Per chart configuration](#per-chart-configuration). +6. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md). +7. `[plugin:NAME]` sections for each collector plugin, under the comment [Per plugin configuration](#per-plugin-configuration). +8. `[CHART_NAME]` sections for each chart defined, under the comment [Per chart configuration](#per-chart-configuration). The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a comment on settings it does not currently use. @@ -47,45 +46,45 @@ Please note that your data history will be lost if you have modified `history` p ### [global] section options -| setting|default|info||| -|:-----:|:-----:|:---|---|---| -| process scheduling policy|`keep`|See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy)||| -| OOM score|`1000`|See [OOM score](/daemon/README.md#oom-score)||| -| glibc malloc arena max for plugins|`1`|See [Virtual memory](/daemon/README.md#virtual-memory).||| -| glibc malloc arena max for Netdata|`1`|See [Virtual memory](/daemon/README.md#virtual-memory).||| -| hostname|auto-detected|The hostname of the computer running Netdata.||| -| history|`3996`| Used with `memory mode = save/map/ram/alloc`, not the default `memory mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. This setting can also be configured per chart. Check [Memory Requirements](/database/README.md) for more information. ||| -| update every|`1`|The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md).||| -| config directory|`/etc/netdata`|The directory configuration files are kept.||| -| stock config directory|`/usr/lib/netdata/conf.d`|||| -| log directory|`/var/log/netdata`|The directory in which the [log files](/daemon/README.md#log-files) are kept.||| -| web files directory|`/usr/share/netdata/web`|The directory the web static files are kept.||| -| cache directory|`/var/cache/netdata`|The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point.||| -| lib directory|`/var/lib/netdata`|Contains the alarm log and the Netdata instance guid.||| -| home directory|`/var/cache/netdata`|Contains the db files for the collected metrics||| -| plugins directory|`"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"`|The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes.||| +| setting|default|info| +|:-----:|:-----:|:---| +| process scheduling policy|`keep`|See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy)| +| OOM score|`0`|| +| glibc malloc arena max for plugins|`1`|See [Virtual memory](/daemon/README.md#virtual-memory).| +| glibc malloc arena max for Netdata|`1`|See [Virtual memory](/daemon/README.md#virtual-memory).| +| hostname|auto-detected|The hostname of the computer running Netdata.| +| history|`3996`| Used with `memory mode = save/map/ram/alloc`, not the default `memory mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. This setting can also be configured per chart. Check [Memory Requirements](/database/README.md) for more information. | +| update every|`1`|The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md).| +| config directory|`/etc/netdata`|The directory configuration files are kept.| +| stock config directory|`/usr/lib/netdata/conf.d`|| +| log directory|`/var/log/netdata`|The directory in which the [log files](/daemon/README.md#log-files) are kept.| +| web files directory|`/usr/share/netdata/web`|The directory the web static files are kept.| +| cache directory|`/var/cache/netdata`|The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point.| +| lib directory|`/var/lib/netdata`|Contains the alarm log and the Netdata instance guid.| +| home directory|`/var/cache/netdata`|Contains the db files for the collected metrics| +| plugins directory|`"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"`|The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes.| | memory mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `page cache size` and `dbengine disk space`.
`save`: Netdata will save its round robin database on exit and load it on startup.
`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`).
`ram`: The round-robin database will be temporary and it will be lost when Netdata exits.
`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. | -| page cache size | 32 | Determines the amount of RAM in MiB that is dedicated to caching Netdata metric values. ||| -| dbengine disk space | 256 | Determines the amount of disk space in MiB that is dedicated to storing Netdata metric values and all related metadata describing them. ||| -| dbengine multihost disk space | 256 | Same functionality as `dbengine disk space`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. ||| -| host access prefix||This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43).| -| memory deduplication (ksm)|`yes`|When set to `yes`, Netdata will offer its in-memory round robin database to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm)||| -| TZ environment variable|`:/etc/localtime`|Where to find the timezone||| -| timezone|auto-detected|The timezone retrieved from the environment variable||| -| debug flags|`0x0000000000000000`|Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging).||| -| debug log|`/var/log/netdata/debug.log`|The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging).||| -| error log|`/var/log/netdata/error.log`|The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log.||| -| access log|`/var/log/netdata/access.log`|The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log.||| -| errors flood protection period|`1200`|Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`.||| -| errors to trigger flood protection|`200`|Number of errors written to the log in `errors flood protection period` sec before flood protection is activated.||| -| run as user|`netdata`|The user Netdata will run as.||| -| pthread stack size|auto-detected|||| -| cleanup obsolete charts after seconds|`3600`|See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions||| -| gap when lost iterations above|`1`|||| -| cleanup orphan hosts after seconds|`3600`|How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data.||| -| delete obsolete charts files|`yes`|See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions||| -| delete orphan hosts files|`yes`|Set to `no` to disable non-responsive host removal.||| -| enable zero metrics|`no`|Set to `yes` to show charts when all their metrics are zero.||| +| page cache size | 32 | Determines the amount of RAM in MiB that is dedicated to caching Netdata metric values. | +| dbengine disk space | 256 | Determines the amount of disk space in MiB that is dedicated to storing Netdata metric values and all related metadata describing them. | +| dbengine multihost disk space | 256 | Same functionality as `dbengine disk space`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. | +| host access prefix| |This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43).| +| memory deduplication (ksm)|`yes`|When set to `yes`, Netdata will offer its in-memory round robin database to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm)| +| TZ environment variable|`:/etc/localtime`|Where to find the timezone| +| timezone|auto-detected|The timezone retrieved from the environment variable| +| debug flags|`0x0000000000000000`|Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging).| +| debug log|`/var/log/netdata/debug.log`|The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging).| +| error log|`/var/log/netdata/error.log`|The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log.| +| access log|`/var/log/netdata/access.log`|The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log.| +| errors flood protection period|`1200`|Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`.| +| errors to trigger flood protection|`200`|Number of errors written to the log in `errors flood protection period` sec before flood protection is activated.| +| run as user|`netdata`|The user Netdata will run as.| +| pthread stack size|auto-detected|| +| cleanup obsolete charts after seconds|`3600`|See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions| +| gap when lost iterations above|`1`|| +| cleanup orphan hosts after seconds|`3600`|How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data.| +| delete obsolete charts files|`yes`|See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions| +| delete orphan hosts files|`yes`|Set to `no` to disable non-responsive host removal.| +| enable zero metrics|`no`|Set to `yes` to show charts when all their metrics are zero.| ### [web] section options @@ -129,10 +128,6 @@ monitoring](/health/README.md). To understand what this section is and how it should be configured, please refer to the [registry documentation](/registry/README.md). -### [backend] - -Refer to the [streaming and replication](/streaming/README.md) documentation. - ## Per-plugin configuration The configuration options for plugins appear in sections following the pattern `[plugin:NAME]`. @@ -227,4 +222,4 @@ multiplying the value of `out` by -1, Netdata creates the negative values seen i ![The system.io chart on a macOS laptop](https://user-images.githubusercontent.com/1153921/69286708-2cfb3900-0bb1-11ea-9fcd-dd8fbb2adf11.png) -[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdaemon%2Fconfig%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) + diff --git a/daemon/daemon.c b/daemon/daemon.c index 68e161a3f..2b8a6552b 100644 --- a/daemon/daemon.c +++ b/daemon/daemon.c @@ -102,7 +102,11 @@ int become_user(const char *username, int pid_fd) { gid_t *supplementary_groups = NULL; if(ngroups > 0) { supplementary_groups = mallocz(sizeof(gid_t) * ngroups); +#ifdef __APPLE__ + if(getgrouplist(username, gid, (int *)supplementary_groups, &ngroups) == -1) { +#else if(getgrouplist(username, gid, supplementary_groups, &ngroups) == -1) { +#endif /* __APPLE__ */ if(am_i_root) error("Cannot get supplementary groups of user '%s'.", username); @@ -173,7 +177,7 @@ int become_user(const char *username, int pid_fd) { static void oom_score_adj(void) { char buf[30 + 1]; - long long int old_score, wanted_score = OOM_SCORE_ADJ_MAX, final_score = 0; + long long int old_score, wanted_score = 0, final_score = 0; // read the existing score if(read_single_signed_number_file("/proc/self/oom_score_adj", &old_score)) { @@ -271,8 +275,8 @@ struct sched_def { // the available members are important too! // these are all the possible scheduling policies supported by netdata -#ifdef SCHED_IDLE - { "idle", SCHED_IDLE, 0, SCHED_FLAG_NONE }, +#ifdef SCHED_BATCH + { "batch", SCHED_BATCH, 0, SCHED_FLAG_USE_NICE }, #endif #ifdef SCHED_OTHER @@ -280,6 +284,10 @@ struct sched_def { { "nice", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, #endif +#ifdef SCHED_IDLE + { "idle", SCHED_IDLE, 0, SCHED_FLAG_NONE }, +#endif + #ifdef SCHED_RR { "rr", SCHED_RR, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, #endif @@ -288,10 +296,6 @@ struct sched_def { { "fifo", SCHED_FIFO, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, #endif -#ifdef SCHED_BATCH - { "batch", SCHED_BATCH, 0, SCHED_FLAG_USE_NICE }, -#endif - // do not change the scheduling priority { "keep", 0, 0, SCHED_FLAG_KEEP_AS_IS }, { "none", 0, 0, SCHED_FLAG_KEEP_AS_IS }, @@ -339,13 +343,7 @@ static void sched_getscheduler_report(void) { } } } -#else // !HAVE_SCHED_GETSCHEDULER -static void sched_getscheduler_report(void) { -#ifdef HAVE_GETPRIORITY - info("Running with priority %d", getpriority(PRIO_PROCESS, 0)); -#endif // HAVE_GETPRIORITY -} -#endif // !HAVE_SCHED_GETSCHEDULER +#endif /* HAVE_SCHED_GETSCHEDULER */ #ifdef HAVE_SCHED_SETSCHEDULER @@ -418,11 +416,11 @@ fallback: report: sched_getscheduler_report(); } -#else // !HAVE_SCHED_SETSCHEDULER +#else /* HAVE_SCHED_SETSCHEDULER */ static void sched_setscheduler_set(void) { process_nice_level(); } -#endif // !HAVE_SCHED_SETSCHEDULER +#endif /* HAVE_SCHED_SETSCHEDULER */ int become_daemon(int dont_fork, const char *user) { diff --git a/daemon/get-kubernetes-labels.sh.in b/daemon/get-kubernetes-labels.sh.in index 5aa89ab9d..7e11ba3dd 100644 --- a/daemon/get-kubernetes-labels.sh.in +++ b/daemon/get-kubernetes-labels.sh.in @@ -1,6 +1,8 @@ #!/usr/bin/env bash -# Checks if netdata is running in a kubernetes pod and fetches that pod's labels +# Checks if netdata is running in a kubernetes pod and fetches: +# - pod's labels +# - kubernetes cluster name (GKE only) if [ -z "${KUBERNETES_SERVICE_HOST}" ] || [ -z "${KUBERNETES_PORT_443_TCP_PORT}" ] || [ -z "${MY_POD_NAMESPACE}" ] || [ -z "${MY_POD_NAME}" ]; then exit 0 @@ -37,5 +39,20 @@ if ! KUBE_SYSTEM_NS_UID=$(jq -r '.metadata.uid' <<< "$KUBE_SYSTEM_NS_DATA" 2>&1) exit 1 fi -echo -e "$POD_LABELS\nk8s_cluster_id:$KUBE_SYSTEM_NS_UID" +LABELS="$POD_LABELS\nk8s_cluster_id:$KUBE_SYSTEM_NS_UID" + +GCP_META_HEADER="Metadata-Flavor: Google" +GCP_META_URL="http://metadata/computeMetadata/v1" +GKE_CLUSTER_NAME="" + +if id=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/project/project-id"); then + loc=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-location") + name=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-name") + [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ] && GKE_CLUSTER_NAME="gke_${id}_${loc}_${name}" +fi + +[ -n "$GKE_CLUSTER_NAME" ] && LABELS+="\nk8s_cluster_name:$GKE_CLUSTER_NAME" + +echo -e "$LABELS" + exit 0 diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index f4f3e5762..d5cc03159 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -37,7 +37,7 @@ static struct global_statistics { .rrdr_result_points_generated = 0, }; -#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) +#if defined(HAVE_C___ATOMIC) #else netdata_mutex_t global_statistics_mutex = NETDATA_MUTEX_INITIALIZER; @@ -52,7 +52,7 @@ static inline void global_statistics_unlock(void) { void rrdr_query_completed(uint64_t db_points_read, uint64_t result_points_generated) { -#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) +#if defined(HAVE_C___ATOMIC) __atomic_fetch_add(&global_statistics.rrdr_queries_made, 1, __ATOMIC_SEQ_CST); __atomic_fetch_add(&global_statistics.rrdr_db_points_read, db_points_read, __ATOMIC_SEQ_CST); __atomic_fetch_add(&global_statistics.rrdr_result_points_generated, result_points_generated, __ATOMIC_SEQ_CST); @@ -75,7 +75,7 @@ void finished_web_request_statistics(uint64_t dt, uint64_t bytes_sent, uint64_t content_size, uint64_t compressed_content_size) { -#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) +#if defined(HAVE_C___ATOMIC) uint64_t old_web_usec_max = global_statistics.web_usec_max; while(dt > old_web_usec_max) __atomic_compare_exchange(&global_statistics.web_usec_max, &old_web_usec_max, &dt, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); @@ -107,7 +107,7 @@ void finished_web_request_statistics(uint64_t dt, } uint64_t web_client_connected(void) { -#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) +#if defined(HAVE_C___ATOMIC) __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); uint64_t id = __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_SEQ_CST); #else @@ -125,7 +125,7 @@ uint64_t web_client_connected(void) { } void web_client_disconnected(void) { -#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) +#if defined(HAVE_C___ATOMIC) __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); #else if (web_server_is_multithreaded) @@ -140,7 +140,7 @@ void web_client_disconnected(void) { static inline void global_statistics_copy(struct global_statistics *gs, uint8_t options) { -#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) +#if defined(HAVE_C___ATOMIC) gs->connected_clients = __atomic_fetch_add(&global_statistics.connected_clients, 0, __ATOMIC_SEQ_CST); gs->web_requests = __atomic_fetch_add(&global_statistics.web_requests, 0, __ATOMIC_SEQ_CST); gs->web_usec = __atomic_fetch_add(&global_statistics.web_usec, 0, __ATOMIC_SEQ_CST); diff --git a/daemon/main.c b/daemon/main.c index 7c8e09a5f..c18778974 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -250,6 +250,8 @@ void cancel_main_threads() { else info("All threads finished."); + for (i = 0; static_threads[i].name != NULL ; i++) + freez(static_threads[i].thread); free(static_threads); } @@ -445,15 +447,6 @@ static void backwards_compatible_config() { config_move(CONFIG_SECTION_GLOBAL, "web compression level", CONFIG_SECTION_WEB, "gzip compression level"); - - config_move(CONFIG_SECTION_GLOBAL, "web files owner", - CONFIG_SECTION_WEB, "web files owner"); - - config_move(CONFIG_SECTION_GLOBAL, "web files group", - CONFIG_SECTION_WEB, "web files group"); - - config_move(CONFIG_SECTION_BACKEND, "opentsdb host tags", - CONFIG_SECTION_BACKEND, "host tags"); } static void get_netdata_configured_variables() { @@ -690,6 +683,7 @@ int main(int argc, char **argv) { int i; int config_loaded = 0; int dont_fork = 0; + bool close_open_fds = true; size_t default_stacksize; char *user = NULL; @@ -794,8 +788,13 @@ int main(int argc, char **argv) { } if(strcmp(optarg, "unittest") == 0) { - if(unit_test_buffer()) return 1; - if(unit_test_str2ld()) return 1; + if (unit_test_static_threads()) + return 1; + if (unit_test_buffer()) + return 1; + if (unit_test_str2ld()) + return 1; + // No call to load the config file on this code-path post_conf_load(&user); get_netdata_configured_variables(); @@ -1038,7 +1037,13 @@ int main(int argc, char **argv) { print_build_info_json(); return 0; } - else { + else if(strcmp(optarg, "keepopenfds") == 0) { + // Internal dev option to skip closing inherited + // open FDs. Useful, when we want to run the agent + // under profiling tools that open/maintain their + // own FDs. + close_open_fds = false; + } else { fprintf(stderr, "Unknown -W parameter '%s'\n", optarg); return help(1); } @@ -1053,12 +1058,12 @@ int main(int argc, char **argv) { } #ifdef _SC_OPEN_MAX - // close all open file descriptors, except the standard ones - // the caller may have left open files (lxc-attach has this issue) - { - int fd; - for(fd = (int) (sysconf(_SC_OPEN_MAX) - 1); fd > 2; fd--) - if(fd_is_valid(fd)) close(fd); + if (close_open_fds == true) { + // close all open file descriptors, except the standard ones + // the caller may have left open files (lxc-attach has this issue) + for(int fd = (int) (sysconf(_SC_OPEN_MAX) - 1); fd > 2; fd--) + if(fd_is_valid(fd)) + close(fd); } #endif @@ -1205,11 +1210,6 @@ int main(int argc, char **argv) { info("netdata started on pid %d.", getpid()); - // IMPORTANT: these have to run once, while single threaded - // but after we have switched user - web_files_uid(); - web_files_gid(); - netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize)); // initialize internal registry @@ -1232,6 +1232,7 @@ int main(int argc, char **argv) { struct rrdhost_system_info *system_info = calloc(1, sizeof(struct rrdhost_system_info)); get_system_info(system_info); system_info->hops = 0; + get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); if(rrd_init(netdata_configured_hostname, system_info)) fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname); @@ -1317,12 +1318,6 @@ int main(int argc, char **argv) { snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir); if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized send_statistics("ACLK_DISABLED", "-", "-"); -#ifdef ACLK_NO_LWS - send_statistics("BUILD_FAIL_LWS", "-", "-"); -#endif -#ifdef ACLK_NO_LIBMOSQ - send_statistics("BUILD_FAIL_MOSQ", "-", "-"); -#endif int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444); if (fd == -1) error("Cannot create file '%s'. Please fix this.", filename); diff --git a/daemon/service.c b/daemon/service.c index d3a4e5500..61cc1281a 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -31,6 +31,11 @@ void *service_main(void *ptr) heartbeat_next(&hb, step); rrd_cleanup_obsolete_charts(); + + rrd_wrlock(); + rrdhost_cleanup_orphan_hosts_nolock(localhost); + rrd_unlock(); + } netdata_thread_cleanup_pop(1); diff --git a/daemon/static_threads.c b/daemon/static_threads.c index 534b3c3d8..c07473bd6 100644 --- a/daemon/static_threads.c +++ b/daemon/static_threads.c @@ -11,8 +11,18 @@ extern void *health_main(void *ptr); extern void *pluginsd_main(void *ptr); extern void *service_main(void *ptr); extern void *statsd_main(void *ptr); +extern void *timex_main(void *ptr); const struct netdata_static_thread static_threads_common[] = { + { + .name = "PLUGIN[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, { .name = "PLUGIN[check]", .config_section = CONFIG_SECTION_PLUGINS, @@ -85,15 +95,6 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = statsd_main }, - { - .name = "BACKENDS", - .config_section = NULL, - .config_name = NULL, - .enabled = 1, - .thread = NULL, - .init_routine = NULL, - .start_routine = backends_main - }, { .name = "EXPORTING", .config_section = NULL, diff --git a/daemon/static_threads_linux.c b/daemon/static_threads_linux.c index e55c0fec7..5f7a67768 100644 --- a/daemon/static_threads_linux.c +++ b/daemon/static_threads_linux.c @@ -9,15 +9,6 @@ extern void *tc_main(void *ptr); extern void *timex_main(void *ptr); const struct netdata_static_thread static_threads_linux[] = { - { - .name = "PLUGIN[timex]", - .config_section = CONFIG_SECTION_PLUGINS, - .config_name = "timex", - .enabled = 1, - .thread = NULL, - .init_routine = NULL, - .start_routine = timex_main - }, { .name = "PLUGIN[tc]", .config_section = CONFIG_SECTION_PLUGINS, diff --git a/daemon/system-info.sh b/daemon/system-info.sh index 728e38091..7fb2f25b5 100755 --- a/daemon/system-info.sh +++ b/daemon/system-info.sh @@ -21,7 +21,7 @@ if [ -z "${VIRTUALIZATION}" ]; then if [ -n "$(command -v systemd-detect-virt 2> /dev/null)" ]; then VIRTUALIZATION="$(systemd-detect-virt -v)" VIRT_DETECTION="systemd-detect-virt" - CONTAINER="$(systemd-detect-virt -c)" + CONTAINER=${CONTAINER:-$(systemd-detect-virt -c)} CONT_DETECTION="systemd-detect-virt" else if grep -q "^flags.*hypervisor" /proc/cpuinfo 2> /dev/null; then @@ -153,7 +153,6 @@ if [ "${CONTAINER}" = "unknown" ] || [ "${CONTAINER}" = "none" ]; then else # Otherwise try and use a user-supplied bind-mount into the container to resolve the host details if [ -e "/host/etc/os-release" ]; then - OS_DETECTION="/etc/os-release" eval "$(grep -E "^(NAME|ID|ID_LIKE|VERSION|VERSION_ID)=" < /host/etc/os-release | sed 's/^/HOST_/')" HOST_OS_DETECTION="/host/etc/os-release" fi @@ -197,10 +196,14 @@ if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" - possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*') MHz" - if [ "${possible_cpu_freq}" = " MHz" ]; then - possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*') MHz" + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" fi + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep "^Model name:" | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}')" + fi + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" elif [ -n "${dmidecode}" ] && dmidecode -t processor > /dev/null 2>&1; then dmidecode_output="$(${dmidecode} -t processor 2> /dev/null)" CPU_INFO_SOURCE="dmidecode" @@ -215,6 +218,13 @@ else elif [ "${KERNEL_NAME}" = FreeBSD ]; then CPU_INFO_SOURCE="sysctl" LCPU_COUNT="$(sysctl -n kern.smp.cpus)" + if ! possible_cpu_freq=$(sysctl -n machdep.tsc_freq 2> /dev/null); then + possible_cpu_freq=$(sysctl -n hw.model 2> /dev/null | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}') + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" + fi + elif [ "${KERNEL_NAME}" = Darwin ]; then + CPU_INFO_SOURCE="sysctl" + LCPU_COUNT="$(sysctl -n hw.logicalcpu)" elif [ -d /sys/devices/system/cpu ]; then CPU_INFO_SOURCE="sysfs" # This is potentially more accurate than checking `/proc/cpuinfo`. @@ -224,8 +234,15 @@ else LCPU_COUNT="$(grep -c ^processor /proc/cpuinfo)" fi - # If we have GNU uname, we can use that to get CPU info (probably). - if uname --version 2> /dev/null | grep -qF 'GNU coreutils'; then + if [ "${KERNEL_NAME}" = Darwin ]; then + CPU_MODEL="$(sysctl -n machdep.cpu.brand_string)" + if [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_VENDOR="$(sysctl -n machdep.cpu.vendor)" + else + CPU_VENDOR="Apple" + fi + echo "${CPU_INFO_SOURCE}" | grep -qv sysctl && CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysctl" + elif uname --version 2> /dev/null | grep -qF 'GNU coreutils'; then CPU_INFO_SOURCE="${CPU_INFO_SOURCE} uname" CPU_MODEL="$(uname -p)" CPU_VENDOR="$(uname -i)" @@ -245,12 +262,15 @@ else fi fi -if [ -r /sys/devices/system/cpu/cpu0/cpufreq/base_frequency ]; then +if [ "${KERNEL_NAME}" = Darwin ] && [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_FREQ="$(sysctl -n hw.cpufrequency)" +elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/base_frequency ]; then if (echo "${CPU_INFO_SOURCE}" | grep -qv sysfs); then CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" fi - CPU_FREQ="$(cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency)" + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency)" + CPU_FREQ="$((value * 1000))" elif [ -n "${possible_cpu_freq}" ]; then CPU_FREQ="${possible_cpu_freq}" elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq ]; then @@ -258,7 +278,14 @@ elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq ]; then CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" fi - CPU_FREQ="$(cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq)" + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq)" + CPU_FREQ="$((value * 1000))" +elif [ -r /proc/cpuinfo ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv procfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} procfs" + fi + value=$(grep "cpu MHz" /proc/cpuinfo 2>/dev/null | grep -o "[0-9]*" | head -n 1 | awk '{print int($0*1000000)}') + [ -n "$value" ] && CPU_FREQ="$value" fi freq_units="$(echo "${CPU_FREQ}" | cut -f 2 -d ' ')" @@ -376,6 +403,47 @@ elif pgrep "kubelet"; then HOST_IS_K8S_NODE="true" fi +# ------------------------------------------------------------------------------------------------ +# Detect instance metadata for VMs running on cloud providers + +CLOUD_TYPE="unknown" +CLOUD_INSTANCE_TYPE="unknown" +CLOUD_INSTANCE_REGION="unknown" + +if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then + # Try AWS IMDSv2 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + AWS_IMDS_TOKEN="$(curl --fail -s -m 5 --noproxy "*" -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")" + if [ -n "${AWS_IMDS_TOKEN}" ]; then + CLOUD_TYPE="AWS" + CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/instance-type" 2> /dev/null)" + CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/placement/region" 2> /dev/null)" + fi + fi + + # Try GCE computeMetadata v1 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + if [ -n "$(curl --fail -s -m 5 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1")" ]; then + CLOUD_TYPE="GCP" + CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/machine-type")" + [ -n "$CLOUD_INSTANCE_TYPE" ] && CLOUD_INSTANCE_TYPE=$(basename "$CLOUD_INSTANCE_TYPE") + CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/zone")" + [ -n "$CLOUD_INSTANCE_REGION" ] && CLOUD_INSTANCE_REGION=$(basename "$CLOUD_INSTANCE_REGION") && CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION%-*} + fi + fi + + # TODO: needs to be tested in Microsoft Azure + # Try Azure IMDS + # if [ "${CLOUD_TYPE}" = "unknown" ]; then + # AZURE_IMDS_DATA="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance?version=2021-10-01")" + # if [ -n "${AZURE_IMDS_DATA}" ]; then + # CLOUD_TYPE="Azure" + # CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/vmSize?version=2021-10-01&format=text")" + # CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/location?version=2021-10-01&format=text")" + # fi + # fi +fi + echo "NETDATA_CONTAINER_OS_NAME=${CONTAINER_NAME}" echo "NETDATA_CONTAINER_OS_ID=${CONTAINER_ID}" echo "NETDATA_CONTAINER_OS_ID_LIKE=${CONTAINER_ID_LIKE}" @@ -406,3 +474,6 @@ echo "NETDATA_SYSTEM_TOTAL_RAM=${TOTAL_RAM}" echo "NETDATA_SYSTEM_RAM_DETECTION=${RAM_DETECTION}" echo "NETDATA_SYSTEM_TOTAL_DISK_SIZE=${DISK_SIZE}" echo "NETDATA_SYSTEM_DISK_DETECTION=${DISK_DETECTION}" +echo "NETDATA_INSTANCE_CLOUD_TYPE=${CLOUD_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE=${CLOUD_INSTANCE_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION}" diff --git a/daemon/unit_test.c b/daemon/unit_test.c index 456dff961..7a52735d5 100644 --- a/daemon/unit_test.c +++ b/daemon/unit_test.c @@ -404,6 +404,44 @@ int unit_test_buffer() { return 0; } +int unit_test_static_threads() { + struct netdata_static_thread *static_threads = static_threads_get(); + + /* + * make sure enough static threads have been registered + */ + if (!static_threads) { + fprintf(stderr, "empty static_threads array\n"); + return 1; + } + + int n; + for (n = 0; static_threads[n].start_routine != NULL; n++) {} + + if (n < 2) { + fprintf(stderr, "only %d static threads registered", n); + freez(static_threads); + return 1; + } + + /* + * verify that each thread's start routine is unique. + */ + for (int i = 0; i != n - 1; i++) { + for (int j = i + 1; j != n; j++) { + if (static_threads[i].start_routine != static_threads[j].start_routine) + continue; + + fprintf(stderr, "Found duplicate threads with name: %s\n", static_threads[i].name); + freez(static_threads); + return 1; + } + } + + freez(static_threads); + return 0; +} + // -------------------------------------------------------------------------------------------------------------------- struct feed_values { @@ -1521,7 +1559,7 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name) , netdata_configured_timezone , netdata_configured_abbrev_timezone , netdata_configured_utc_offset - , config_get(CONFIG_SECTION_BACKEND, "host tags", "") + , "" , program_name , program_version , default_rrd_update_every @@ -1694,7 +1732,7 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] update_every = REGION_UPDATE_EVERY[current_region]; long points = (time_end - time_start) / update_every; for (i = 0 ; i < CHARTS ; ++i) { - RRDR *r = rrd2rrdr(st[i], points, time_start + update_every, time_end, RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL); + RRDR *r = rrd2rrdr(st[i], points, time_start + update_every, time_end, RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL, 0); if (!r) { fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", st[i]->name); return ++errors; @@ -1813,7 +1851,7 @@ int test_dbengine(void) long points = (time_end[REGIONS - 1] - time_start[0]) / update_every; // cover all time regions with RRDR long point_offset = (time_start[current_region] - time_start[0]) / update_every; for (i = 0 ; i < CHARTS ; ++i) { - RRDR *r = rrd2rrdr(st[i], points, time_start[0] + update_every, time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL); + RRDR *r = rrd2rrdr(st[i], points, time_start[0] + update_every, time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL, 0); if (!r) { fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", st[i]->name); ++errors; diff --git a/daemon/unit_test.h b/daemon/unit_test.h index 3a3b64902..6a7a966c3 100644 --- a/daemon/unit_test.h +++ b/daemon/unit_test.h @@ -8,6 +8,7 @@ extern int unit_test(long delay, long shift); extern int run_all_mockup_tests(void); extern int unit_test_str2ld(void); extern int unit_test_buffer(void); +extern int unit_test_static_threads(void); extern int test_sqlite(void); #ifdef ENABLE_DBENGINE extern int test_dbengine(void); -- cgit v1.2.3