1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
// SPDX-License-Identifier: GPL-3.0-or-later
#include "commands.h"
#include "plugins.d/pluginsd_internals.h"
// the child disconnected from the parent, and it has to clear the parent's claim id
void rrdpush_sender_clear_parent_claim_id(RRDHOST *host) {
host->aclk.claim_id_of_parent = UUID_ZERO;
}
// the parent sends to the child its claim id, node id and cloud url
void rrdpush_receiver_send_node_and_claim_id_to_child(RRDHOST *host) {
if(host == localhost || UUIDiszero(host->node_id)) return;
spinlock_lock(&host->receiver_lock);
if(host->receiver && stream_has_capability(host->receiver, STREAM_CAP_NODE_ID)) {
char node_id_str[UUID_STR_LEN] = "";
uuid_unparse_lower(host->node_id.uuid, node_id_str);
CLAIM_ID claim_id = claim_id_get();
if((!claim_id_is_set(claim_id) || !aclk_online())) {
// the agent is not claimed or not connected, just use parent claim id
// to allow the connection flow.
// this may be zero and it is ok.
claim_id.uuid = host->aclk.claim_id_of_parent;
uuid_unparse_lower(claim_id.uuid.uuid, claim_id.str);
}
char buf[4096];
snprintfz(buf, sizeof(buf),
PLUGINSD_KEYWORD_NODE_ID " '%s' '%s' '%s'\n",
claim_id.str, node_id_str, cloud_config_url_get());
send_to_plugin(buf, __atomic_load_n(&host->receiver->parser, __ATOMIC_RELAXED));
}
spinlock_unlock(&host->receiver_lock);
}
// the sender of the child receives node id, claim id and cloud url from the receiver of the parent
void rrdpush_sender_get_node_and_claim_id_from_parent(struct sender_state *s) {
char *claim_id_str = get_word(s->line.words, s->line.num_words, 1);
char *node_id_str = get_word(s->line.words, s->line.num_words, 2);
char *url = get_word(s->line.words, s->line.num_words, 3);
bool claimed = is_agent_claimed();
bool update_node_id = false;
ND_UUID claim_id;
if (uuid_parse(claim_id_str ? claim_id_str : "", claim_id.uuid) != 0) {
nd_log(NDLS_DAEMON, NDLP_ERR,
"STREAM %s [send to %s] received invalid claim id '%s'",
rrdhost_hostname(s->host), s->connected_to,
claim_id_str ? claim_id_str : "(unset)");
return;
}
ND_UUID node_id;
if(uuid_parse(node_id_str ? node_id_str : "", node_id.uuid) != 0) {
nd_log(NDLS_DAEMON, NDLP_ERR,
"STREAM %s [send to %s] received an invalid node id '%s'",
rrdhost_hostname(s->host), s->connected_to,
node_id_str ? node_id_str : "(unset)");
return;
}
if (!UUIDiszero(s->host->aclk.claim_id_of_parent) && !UUIDeq(s->host->aclk.claim_id_of_parent, claim_id))
nd_log(NDLS_DAEMON, NDLP_INFO,
"STREAM %s [send to %s] changed parent's claim id to %s",
rrdhost_hostname(s->host), s->connected_to,
claim_id_str ? claim_id_str : "(unset)");
if(!UUIDiszero(s->host->node_id) && !UUIDeq(s->host->node_id, node_id)) {
if(claimed) {
nd_log(NDLS_DAEMON, NDLP_ERR,
"STREAM %s [send to %s] parent reports different node id '%s', but we are claimed. Ignoring it.",
rrdhost_hostname(s->host), s->connected_to,
node_id_str ? node_id_str : "(unset)");
return;
}
else {
update_node_id = true;
nd_log(NDLS_DAEMON, NDLP_WARNING,
"STREAM %s [send to %s] changed node id to %s",
rrdhost_hostname(s->host), s->connected_to,
node_id_str ? node_id_str : "(unset)");
}
}
if(!url || !*url) {
nd_log(NDLS_DAEMON, NDLP_ERR,
"STREAM %s [send to %s] received an invalid cloud URL '%s'",
rrdhost_hostname(s->host), s->connected_to,
url ? url : "(unset)");
return;
}
s->host->aclk.claim_id_of_parent = claim_id;
// There are some very strange corner cases here:
//
// - Agent is claimed but offline, and it receives node_id and cloud_url from a different Netdata Cloud.
// - Agent is configured to talk to an on-prem Netdata Cloud, it is offline, but the parent is connected
// to a different Netdata Cloud.
//
// The solution below, tries to get the agent online, using the latest information.
// So, if the agent is not claimed or not connected, we inherit whatever information sent from the parent,
// to allow the user to work with it.
if(claimed && aclk_online())
// we are directly claimed and connected, ignore node id and cloud url
return;
bool node_id_updated = false;
if(UUIDiszero(s->host->node_id) || update_node_id) {
s->host->node_id = node_id;
node_id_updated = true;
}
// we change the URL, to allow the agent dashboard to work with Netdata Cloud on-prem, if any.
cloud_config_url_set(url);
// send it down the line (to children)
rrdpush_receiver_send_node_and_claim_id_to_child(s->host);
if(node_id_updated)
stream_path_node_id_updated(s->host);
}
|