summaryrefslogtreecommitdiffstats
path: root/src/streaming/protocol/command-nodeid.c
blob: 85ace83c8f48ed3428d2b39ce27d6151292ace70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// SPDX-License-Identifier: GPL-3.0-or-later

#include "commands.h"
#include "plugins.d/pluginsd_internals.h"

// the child disconnected from the parent, and it has to clear the parent's claim id
void rrdpush_sender_clear_parent_claim_id(RRDHOST *host) {
    host->aclk.claim_id_of_parent = UUID_ZERO;
}

// the parent sends to the child its claim id, node id and cloud url
void rrdpush_receiver_send_node_and_claim_id_to_child(RRDHOST *host) {
    if(host == localhost || UUIDiszero(host->node_id)) return;

    spinlock_lock(&host->receiver_lock);
    if(host->receiver && stream_has_capability(host->receiver, STREAM_CAP_NODE_ID)) {
        char node_id_str[UUID_STR_LEN] = "";
        uuid_unparse_lower(host->node_id.uuid, node_id_str);

        CLAIM_ID claim_id = claim_id_get();

        if((!claim_id_is_set(claim_id) || !aclk_online())) {
            // the agent is not claimed or not connected, just use parent claim id
            // to allow the connection flow.
            // this may be zero and it is ok.
            claim_id.uuid = host->aclk.claim_id_of_parent;
            uuid_unparse_lower(claim_id.uuid.uuid, claim_id.str);
        }

        char buf[4096];
        snprintfz(buf, sizeof(buf),
                  PLUGINSD_KEYWORD_NODE_ID " '%s' '%s' '%s'\n",
                  claim_id.str, node_id_str, cloud_config_url_get());

        send_to_plugin(buf, __atomic_load_n(&host->receiver->parser, __ATOMIC_RELAXED));
    }
    spinlock_unlock(&host->receiver_lock);
}

// the sender of the child receives node id, claim id and cloud url from the receiver of the parent
void rrdpush_sender_get_node_and_claim_id_from_parent(struct sender_state *s) {
    char *claim_id_str = get_word(s->line.words, s->line.num_words, 1);
    char *node_id_str = get_word(s->line.words, s->line.num_words, 2);
    char *url = get_word(s->line.words, s->line.num_words, 3);

    bool claimed = is_agent_claimed();
    bool update_node_id = false;

    ND_UUID claim_id;
    if (uuid_parse(claim_id_str ? claim_id_str : "", claim_id.uuid) != 0) {
        nd_log(NDLS_DAEMON, NDLP_ERR,
               "STREAM %s [send to %s] received invalid claim id '%s'",
               rrdhost_hostname(s->host), s->connected_to,
               claim_id_str ? claim_id_str : "(unset)");
        return;
    }

    ND_UUID node_id;
    if(uuid_parse(node_id_str ? node_id_str : "", node_id.uuid) != 0) {
        nd_log(NDLS_DAEMON, NDLP_ERR,
               "STREAM %s [send to %s] received an invalid node id '%s'",
               rrdhost_hostname(s->host), s->connected_to,
               node_id_str ? node_id_str : "(unset)");
        return;
    }

    if (!UUIDiszero(s->host->aclk.claim_id_of_parent) && !UUIDeq(s->host->aclk.claim_id_of_parent, claim_id))
        nd_log(NDLS_DAEMON, NDLP_INFO,
               "STREAM %s [send to %s] changed parent's claim id to %s",
               rrdhost_hostname(s->host), s->connected_to,
               claim_id_str ? claim_id_str : "(unset)");

    if(!UUIDiszero(s->host->node_id) && !UUIDeq(s->host->node_id, node_id)) {
        if(claimed) {
            nd_log(NDLS_DAEMON, NDLP_ERR,
                   "STREAM %s [send to %s] parent reports different node id '%s', but we are claimed. Ignoring it.",
                   rrdhost_hostname(s->host), s->connected_to,
                   node_id_str ? node_id_str : "(unset)");
            return;
        }
        else {
            update_node_id = true;
            nd_log(NDLS_DAEMON, NDLP_WARNING,
                   "STREAM %s [send to %s] changed node id to %s",
                   rrdhost_hostname(s->host), s->connected_to,
                   node_id_str ? node_id_str : "(unset)");
        }
    }

    if(!url || !*url) {
        nd_log(NDLS_DAEMON, NDLP_ERR,
               "STREAM %s [send to %s] received an invalid cloud URL '%s'",
               rrdhost_hostname(s->host), s->connected_to,
               url ? url : "(unset)");
        return;
    }

    s->host->aclk.claim_id_of_parent = claim_id;

    // There are some very strange corner cases here:
    //
    // - Agent is claimed but offline, and it receives node_id and cloud_url from a different Netdata Cloud.
    // - Agent is configured to talk to an on-prem Netdata Cloud, it is offline, but the parent is connected
    //   to a different Netdata Cloud.
    //
    // The solution below, tries to get the agent online, using the latest information.
    // So, if the agent is not claimed or not connected, we inherit whatever information sent from the parent,
    // to allow the user to work with it.

    if(claimed && aclk_online())
        // we are directly claimed and connected, ignore node id and cloud url
        return;

    bool node_id_updated = false;
    if(UUIDiszero(s->host->node_id) || update_node_id) {
        s->host->node_id = node_id;
        node_id_updated = true;
    }

    // we change the URL, to allow the agent dashboard to work with Netdata Cloud on-prem, if any.
    cloud_config_url_set(url);

    // send it down the line (to children)
    rrdpush_receiver_send_node_and_claim_id_to_child(s->host);

    if(node_id_updated)
        stream_path_node_id_updated(s->host);
}