summaryrefslogtreecommitdiffstats
path: root/src/claim/claim.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/claim/claim.c')
-rw-r--r--src/claim/claim.c555
1 files changed, 147 insertions, 408 deletions
diff --git a/src/claim/claim.c b/src/claim/claim.c
index 5383aac37..24e4e1c3c 100644
--- a/src/claim/claim.c
+++ b/src/claim/claim.c
@@ -1,470 +1,209 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "claim.h"
-#include "registry/registry_internals.h"
-#include "aclk/aclk.h"
-#include "aclk/aclk_proxy.h"
-
-char *claiming_pending_arguments = NULL;
-
-static char *claiming_errors[] = {
- "Agent claimed successfully", // 0
- "Unknown argument", // 1
- "Problems with claiming working directory", // 2
- "Missing dependencies", // 3
- "Failure to connect to endpoint", // 4
- "The CLI didn't work", // 5
- "Wrong user", // 6
- "Unknown HTTP error message", // 7
- "invalid node id", // 8
- "invalid node name", // 9
- "invalid room id", // 10
- "invalid public key", // 11
- "token expired/token not found/invalid token", // 12
- "already claimed", // 13
- "processing claiming", // 14
- "Internal Server Error", // 15
- "Gateway Timeout", // 16
- "Service Unavailable", // 17
- "Agent Unique Id Not Readable" // 18
-};
-
-/* Retrieve the claim id for the agent.
- * Caller owns the string.
-*/
-char *get_agent_claimid()
-{
- char *result;
- rrdhost_aclk_state_lock(localhost);
- result = (localhost->aclk_state.claimed_id == NULL) ? NULL : strdupz(localhost->aclk_state.claimed_id);
- rrdhost_aclk_state_unlock(localhost);
- return result;
-}
-
-#define CLAIMING_COMMAND_LENGTH 16384
-#define CLAIMING_PROXY_LENGTH (CLAIMING_COMMAND_LENGTH/4)
-/* rrd_init() and post_conf_load() must have been called before this function */
-CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, const char **msg __maybe_unused)
-{
- if (!force || !netdata_cloud_enabled) {
- netdata_log_error("Refusing to claim agent -> cloud functionality has been disabled");
- return CLAIM_AGENT_CLOUD_DISABLED;
- }
+// --------------------------------------------------------------------------------------------------------------------
+// keep track of the last claiming failure reason
-#ifndef DISABLE_CLOUD
- char command_exec_buffer[CLAIMING_COMMAND_LENGTH + 1];
- char command_line_buffer[CLAIMING_COMMAND_LENGTH + 1];
+static char cloud_claim_failure_reason[4096] = "";
- // This is guaranteed to be set early in main via post_conf_load()
- char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
- if (cloud_base_url == NULL) {
- internal_fatal(true, "Do not move the cloud base url out of post_conf_load!!");
- return CLAIM_AGENT_NO_CLOUD_URL;
+void claim_agent_failure_reason_set(const char *format, ...) {
+ if(!format || !*format) {
+ cloud_claim_failure_reason[0] = '\0';
+ return;
}
- const char *proxy_str;
- ACLK_PROXY_TYPE proxy_type;
- char proxy_flag[CLAIMING_PROXY_LENGTH] = "-noproxy";
-
- proxy_str = aclk_get_proxy(&proxy_type);
-
- if (proxy_type == PROXY_TYPE_SOCKS5 || proxy_type == PROXY_TYPE_HTTP)
- snprintf(proxy_flag, CLAIMING_PROXY_LENGTH, "-proxy=\"%s\"", proxy_str);
-
- snprintfz(command_exec_buffer, CLAIMING_COMMAND_LENGTH,
- "exec \"%s%snetdata-claim.sh\"",
- netdata_exe_path[0] ? netdata_exe_path : "",
- netdata_exe_path[0] ? "/" : ""
- );
-
- snprintfz(command_line_buffer,
- CLAIMING_COMMAND_LENGTH,
- "%s %s -hostname=%s -id=%s -url=%s -noreload %s",
- command_exec_buffer,
- proxy_flag,
- netdata_configured_hostname,
- localhost->machine_guid,
- cloud_base_url,
- claiming_arguments);
-
- netdata_log_info("Executing agent claiming command: %s", command_exec_buffer);
- POPEN_INSTANCE *instance = spawn_popen_run(command_line_buffer);
- if(!instance) {
- netdata_log_error("Cannot popen(\"%s\").", command_exec_buffer);
- return CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT;
- }
+ va_list args;
+ va_start(args, format);
+ vsnprintf(cloud_claim_failure_reason, sizeof(cloud_claim_failure_reason), format, args);
+ va_end(args);
+
+ nd_log(NDLS_DAEMON, NDLP_ERR,
+ "CLAIM: %s", cloud_claim_failure_reason);
+}
- netdata_log_info("Waiting for claiming command '%s' to finish.", command_exec_buffer);
- char read_buffer[100 + 1];
- while (fgets(read_buffer, 100, instance->child_stdout_fp) != NULL) ;
+const char *claim_agent_failure_reason_get(void) {
+ if(!cloud_claim_failure_reason[0])
+ return "Agent is not claimed yet";
+ else
+ return cloud_claim_failure_reason;
+}
- int exit_code = spawn_popen_wait(instance);
+// --------------------------------------------------------------------------------------------------------------------
+// claimed_id load/save
- netdata_log_info("Agent claiming command '%s' returned with code %d", command_exec_buffer, exit_code);
- if (0 == exit_code) {
- load_claiming_state();
- return CLAIM_AGENT_OK;
- }
- if (exit_code < 0) {
- netdata_log_error("Agent claiming command '%s' failed to complete its run", command_exec_buffer);
- return CLAIM_AGENT_CLAIM_SCRIPT_FAILED;
+bool claimed_id_save_to_file(const char *claimed_id_str) {
+ bool ret;
+ const char *filename = filename_from_path_entry_strdupz(netdata_configured_cloud_dir, "claimed_id");
+ FILE *fp = fopen(filename, "w");
+ if(fp) {
+ fprintf(fp, "%s", claimed_id_str);
+ fclose(fp);
+ ret = true;
}
- errno_clear();
- unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1;
-
- if ((unsigned)exit_code > maximum_known_exit_code) {
- netdata_log_error("Agent failed to be claimed with an unknown error. Cmd: '%s'", command_exec_buffer);
- return CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE;
+ else {
+ nd_log(NDLS_DAEMON, NDLP_ERR,
+ "CLAIM: cannot open file '%s' for writing.", filename);
+ ret = false;
}
- netdata_log_error("Agent failed to be claimed using the command '%s' with the following error message: %s",
- command_exec_buffer, claiming_errors[exit_code]);
+ freez((void *)filename);
+ return ret;
+}
- if(msg) *msg = claiming_errors[exit_code];
+static ND_UUID claimed_id_parse(const char *claimed_id, const char *source) {
+ ND_UUID uuid;
-#else
- UNUSED(claiming_arguments);
- UNUSED(claiming_errors);
-#endif
+ if(uuid_parse_flexi(claimed_id, uuid.uuid) != 0) {
+ uuid = UUID_ZERO;
+ nd_log(NDLS_DAEMON, NDLP_ERR,
+ "CLAIM: claimed_id '%s' (loaded from '%s'), is not a valid UUID.",
+ claimed_id, source);
+ }
- return CLAIM_AGENT_FAILED_WITH_MESSAGE;
+ return uuid;
}
-/* Change the claimed state of the agent.
- *
- * This only happens when the user has explicitly requested it:
- * - via the cli tool by reloading the claiming state
- * - after spawning the claim because of a command-line argument
- * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK
- */
-void load_claiming_state(void)
-{
- // --------------------------------------------------------------------
- // Check if the cloud is enabled
-#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
- netdata_cloud_enabled = false;
-#else
- nd_uuid_t uuid;
-
- // Propagate into aclk and registry. Be kind of atomic...
- appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
-
- rrdhost_aclk_state_lock(localhost);
- if (localhost->aclk_state.claimed_id) {
- if (aclk_connected)
- localhost->aclk_state.prev_claimed_id = strdupz(localhost->aclk_state.claimed_id);
- freez(localhost->aclk_state.claimed_id);
- localhost->aclk_state.claimed_id = NULL;
- }
- if (aclk_connected)
- {
- netdata_log_info("Agent was already connected to Cloud - forcing reconnection under new credentials");
- aclk_kill_link = 1;
- }
- aclk_disable_runtime = 0;
-
- char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s/cloud.d/claimed_id", netdata_configured_varlib_dir);
+static ND_UUID claimed_id_load_from_file(void) {
+ ND_UUID uuid;
long bytes_read;
+ const char *filename = filename_from_path_entry_strdupz(netdata_configured_cloud_dir, "claimed_id");
char *claimed_id = read_by_filename(filename, &bytes_read);
- if(claimed_id && uuid_parse(claimed_id, uuid)) {
- netdata_log_error("claimed_id \"%s\" doesn't look like valid UUID", claimed_id);
- freez(claimed_id);
- claimed_id = NULL;
- }
-
- if(claimed_id) {
- localhost->aclk_state.claimed_id = mallocz(UUID_STR_LEN);
- uuid_unparse_lower(uuid, localhost->aclk_state.claimed_id);
- }
- rrdhost_aclk_state_unlock(localhost);
- invalidate_node_instances(&localhost->host_uuid, claimed_id ? &uuid : NULL);
- metaqueue_store_claim_id(&localhost->host_uuid, claimed_id ? &uuid : NULL);
-
- if (!claimed_id) {
- netdata_log_info("Unable to load '%s', setting state to AGENT_UNCLAIMED", filename);
- return;
- }
+ if(!claimed_id)
+ uuid = UUID_ZERO;
+ else
+ uuid = claimed_id_parse(claimed_id, filename);
freez(claimed_id);
-
- netdata_log_info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename);
- netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
-#endif
+ freez((void *)filename);
+ return uuid;
}
-struct config cloud_config = { .first_section = NULL,
- .last_section = NULL,
- .mutex = NETDATA_MUTEX_INITIALIZER,
- .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
- .rwlock = AVL_LOCK_INITIALIZER } };
-
-void load_cloud_conf(int silent)
-{
- char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD");
- if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1))
- netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
-
- char *filename;
- errno_clear();
-
- int ret = 0;
-
- filename = strdupz_path_subpath(netdata_configured_varlib_dir, "cloud.d/cloud.conf");
-
- ret = appconfig_load(&cloud_config, filename, 1, NULL);
- if(!ret && !silent)
- netdata_log_info("CONFIG: cannot load cloud config '%s'. Running with internal defaults.", filename);
-
- freez(filename);
-
- // --------------------------------------------------------------------
- // Check if the cloud is enabled
-
-#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
- netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
-#else
- netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
-#endif
-
- // This must be set before any point in the code that accesses it. Do not move it from this function.
- appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
-}
-
-static char *netdata_random_session_id_filename = NULL;
-static nd_uuid_t netdata_random_session_id = { 0 };
-
-bool netdata_random_session_id_generate(void) {
- static char guid[UUID_STR_LEN] = "";
-
- uuid_generate_random(netdata_random_session_id);
- uuid_unparse_lower(netdata_random_session_id, guid);
-
- char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s/netdata_random_session_id", netdata_configured_varlib_dir);
-
- bool ret = true;
-
- (void)unlink(filename);
-
- // save it
- int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 640);
- if(fd == -1) {
- netdata_log_error("Cannot create random session id file '%s'.", filename);
- ret = false;
+static ND_UUID claimed_id_get_from_cloud_conf(void) {
+ if(appconfig_exists(&cloud_config, CONFIG_SECTION_GLOBAL, "claimed_id")) {
+ const char *claimed_id = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "claimed_id", "");
+ if(claimed_id && *claimed_id)
+ return claimed_id_parse(claimed_id, "cloud.conf");
}
- else {
- if (write(fd, guid, UUID_STR_LEN - 1) != UUID_STR_LEN - 1) {
- netdata_log_error("Cannot write the random session id file '%s'.", filename);
- ret = false;
- } else {
- ssize_t bytes = write(fd, "\n", 1);
- UNUSED(bytes);
- }
- close(fd);
- }
-
- if(ret && (!netdata_random_session_id_filename || strcmp(netdata_random_session_id_filename, filename) != 0)) {
- freez(netdata_random_session_id_filename);
- netdata_random_session_id_filename = strdupz(filename);
- }
-
- return ret;
+ return UUID_ZERO;
}
-const char *netdata_random_session_id_get_filename(void) {
- if(!netdata_random_session_id_filename)
- netdata_random_session_id_generate();
+static ND_UUID claimed_id_load(void) {
+ ND_UUID uuid = claimed_id_get_from_cloud_conf();
+ if(UUIDiszero(uuid))
+ uuid = claimed_id_load_from_file();
- return netdata_random_session_id_filename;
+ return uuid;
}
-bool netdata_random_session_id_matches(const char *guid) {
- if(uuid_is_null(netdata_random_session_id))
- return false;
+bool is_agent_claimed(void) {
+ ND_UUID uuid = claim_id_get_uuid();
+ return !UUIDiszero(uuid);
+}
- nd_uuid_t uuid;
+// --------------------------------------------------------------------------------------------------------------------
- if(uuid_parse(guid, uuid))
+bool claim_id_matches(const char *claim_id) {
+ ND_UUID this_one = UUID_ZERO;
+ if(uuid_parse_flexi(claim_id, this_one.uuid) != 0 || UUIDiszero(this_one))
return false;
- if(uuid_compare(netdata_random_session_id, uuid) == 0)
+ ND_UUID having = claim_id_get_uuid();
+ if(!UUIDiszero(having) && UUIDeq(having, this_one))
return true;
return false;
}
-static bool check_claim_param(const char *s) {
- if(!s || !*s) return true;
+bool claim_id_matches_any(const char *claim_id) {
+ ND_UUID this_one = UUID_ZERO;
+ if(uuid_parse_flexi(claim_id, this_one.uuid) != 0 || UUIDiszero(this_one))
+ return false;
- do {
- if(isalnum((uint8_t)*s) || *s == '.' || *s == ',' || *s == '-' || *s == ':' || *s == '/' || *s == '_')
- ;
- else
- return false;
+ ND_UUID having = claim_id_get_uuid();
+ if(!UUIDiszero(having) && UUIDeq(having, this_one))
+ return true;
- } while(*++s);
+ having = localhost->aclk.claim_id_of_parent;
+ if(!UUIDiszero(having) && UUIDeq(having, this_one))
+ return true;
- return true;
-}
+ having = localhost->aclk.claim_id_of_origin;
+ if(!UUIDiszero(having) && UUIDeq(having, this_one))
+ return true;
-void claim_reload_all(void) {
- nd_log_limits_unlimited();
- load_claiming_state();
- registry_update_cloud_base_url();
- rrdpush_send_claimed_id(localhost);
- nd_log_limits_reset();
+ return false;
}
-int api_v2_claim(struct web_client *w, char *url) {
- char *key = NULL;
- char *token = NULL;
- char *rooms = NULL;
- char *base_url = NULL;
-
- while (url) {
- char *value = strsep_skip_consecutive_separators(&url, "&");
- if (!value || !*value) continue;
-
- char *name = strsep_skip_consecutive_separators(&value, "=");
- if (!name || !*name) continue;
- if (!value || !*value) continue;
-
- if(!strcmp(name, "key"))
- key = value;
- else if(!strcmp(name, "token"))
- token = value;
- else if(!strcmp(name, "rooms"))
- rooms = value;
- else if(!strcmp(name, "url"))
- base_url = value;
+/* Change the claimed state of the agent.
+ *
+ * This only happens when the user has explicitly requested it:
+ * - via the cli tool by reloading the claiming state
+ * - after spawning the claim because of a command-line argument
+ * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK
+ */
+bool load_claiming_state(void) {
+ if (aclk_online()) {
+ nd_log(NDLS_DAEMON, NDLP_ERR,
+ "CLAIM: agent was already connected to NC - forcing reconnection under new credentials");
+ disconnect_req = ACLK_RELOAD_CONF;
}
+ aclk_disable_runtime = 0;
- BUFFER *wb = w->response.data;
- buffer_flush(wb);
- buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
-
- time_t now_s = now_realtime_sec();
- CLOUD_STATUS status = buffer_json_cloud_status(wb, now_s);
-
- bool can_be_claimed = false;
- switch(status) {
- case CLOUD_STATUS_AVAILABLE:
- case CLOUD_STATUS_DISABLED:
- case CLOUD_STATUS_OFFLINE:
- can_be_claimed = true;
- break;
-
- case CLOUD_STATUS_UNAVAILABLE:
- case CLOUD_STATUS_BANNED:
- case CLOUD_STATUS_ONLINE:
- can_be_claimed = false;
- break;
+ ND_UUID uuid = claimed_id_load();
+ if(UUIDiszero(uuid)) {
+ // not found
+ if(claim_agent_automatically())
+ uuid = claimed_id_load();
}
- buffer_json_member_add_boolean(wb, "can_be_claimed", can_be_claimed);
-
- if(can_be_claimed && key) {
- if(!netdata_random_session_id_matches(key)) {
- buffer_reset(wb);
- buffer_strcat(wb, "invalid key");
- netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
- return HTTP_RESP_FORBIDDEN;
- }
-
- if(!token || !base_url || !check_claim_param(token) || !check_claim_param(base_url) || (rooms && !check_claim_param(rooms))) {
- buffer_reset(wb);
- buffer_strcat(wb, "invalid parameters");
- netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
- return HTTP_RESP_BAD_REQUEST;
- }
-
- netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
-
- netdata_cloud_enabled = CONFIG_BOOLEAN_AUTO;
- appconfig_set_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", CONFIG_BOOLEAN_AUTO);
- appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", base_url);
-
- nd_uuid_t claimed_id;
- uuid_generate_random(claimed_id);
- char claimed_id_str[UUID_STR_LEN];
- uuid_unparse_lower(claimed_id, claimed_id_str);
-
- BUFFER *t = buffer_create(1024, NULL);
- if(rooms)
- buffer_sprintf(t, "-id=%s -token=%s -rooms=%s", claimed_id_str, token, rooms);
- else
- buffer_sprintf(t, "-id=%s -token=%s", claimed_id_str, token);
-
- bool success = false;
- const char *msg = NULL;
- CLAIM_AGENT_RESPONSE rc = claim_agent(buffer_tostring(t), true, &msg);
- switch(rc) {
- case CLAIM_AGENT_OK:
- msg = "ok";
- success = true;
- can_be_claimed = false;
- claim_reload_all();
- {
- int ms = 0;
- do {
- status = cloud_status();
- if (status == CLOUD_STATUS_ONLINE && __atomic_load_n(&localhost->node_id, __ATOMIC_RELAXED))
- break;
-
- sleep_usec(50 * USEC_PER_MS);
- ms += 50;
- } while (ms < 10000);
- }
- break;
+ bool have_claimed_id = false;
+ if(!UUIDiszero(uuid)) {
+ // we go it somehow
+ claim_id_set(uuid);
+ have_claimed_id = true;
+ }
- case CLAIM_AGENT_NO_CLOUD_URL:
- msg = "No Netdata Cloud URL.";
- break;
+ invalidate_node_instances(&localhost->host_id.uuid, have_claimed_id ? &uuid.uuid : NULL);
+ metaqueue_store_claim_id(&localhost->host_id.uuid, have_claimed_id ? &uuid.uuid : NULL);
- case CLAIM_AGENT_CLAIM_SCRIPT_FAILED:
- msg = "Claiming script failed.";
- break;
+ errno_clear();
- case CLAIM_AGENT_CLOUD_DISABLED:
- msg = "Netdata Cloud is disabled on this agent.";
- break;
+ if (!have_claimed_id)
+ nd_log(NDLS_DAEMON, NDLP_ERR,
+ "CLAIM: Unable to find our claimed_id, setting state to AGENT_UNCLAIMED");
+ else
+ nd_log(NDLS_DAEMON, NDLP_INFO,
+ "CLAIM: Found a valid claimed_id, setting state to AGENT_CLAIMED");
- case CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT:
- msg = "Failed to execute claiming script.";
- break;
+ return have_claimed_id;
+}
- case CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE:
- msg = "Claiming script returned invalid code.";
- break;
+CLOUD_STATUS claim_reload_and_wait_online(void) {
+ nd_log(NDLS_DAEMON, NDLP_INFO,
+ "CLAIM: Reloading Agent Claiming configuration.");
- default:
- case CLAIM_AGENT_FAILED_WITH_MESSAGE:
- if(!msg)
- msg = "Unknown error";
- break;
- }
-
- // our status may have changed
- // refresh the status in our output
- buffer_flush(wb);
- buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
- now_s = now_realtime_sec();
- buffer_json_cloud_status(wb, now_s);
-
- // and this is the status of the claiming command we run
- buffer_json_member_add_boolean(wb, "success", success);
- buffer_json_member_add_string(wb, "message", msg);
- }
+ nd_log_limits_unlimited();
+ cloud_conf_load(0);
+ bool claimed = load_claiming_state();
+ registry_update_cloud_base_url();
+ rrdpush_sender_send_claimed_id(localhost);
+ nd_log_limits_reset();
- if(can_be_claimed)
- buffer_json_member_add_string(wb, "key_filename", netdata_random_session_id_get_filename());
+ CLOUD_STATUS status = cloud_status();
+ if(claimed) {
+ int ms = 0;
+ do {
+ status = cloud_status();
+ if ((status == CLOUD_STATUS_ONLINE) && !UUIDiszero(localhost->node_id))
+ break;
- buffer_json_agents_v2(wb, NULL, now_s, false, false);
- buffer_json_finalize(wb);
+ sleep_usec(50 * USEC_PER_MS);
+ ms += 50;
+ } while (ms < 10000);
+ }
- return HTTP_RESP_OK;
+ return status;
}