diff options
Diffstat (limited to '')
-rw-r--r-- | src/registry/README.md | 217 | ||||
-rw-r--r-- | src/registry/registry.c (renamed from registry/registry.c) | 8 | ||||
-rw-r--r-- | src/registry/registry.h (renamed from registry/registry.h) | 2 | ||||
-rw-r--r-- | src/registry/registry_db.c (renamed from registry/registry_db.c) | 0 | ||||
-rw-r--r-- | src/registry/registry_init.c (renamed from registry/registry_init.c) | 5 | ||||
-rw-r--r-- | src/registry/registry_internals.c (renamed from registry/registry_internals.c) | 14 | ||||
-rw-r--r-- | src/registry/registry_internals.h (renamed from registry/registry_internals.h) | 0 | ||||
-rw-r--r-- | src/registry/registry_log.c (renamed from registry/registry_log.c) | 0 | ||||
-rw-r--r-- | src/registry/registry_machine.c (renamed from registry/registry_machine.c) | 0 | ||||
-rw-r--r-- | src/registry/registry_machine.h (renamed from registry/registry_machine.h) | 0 | ||||
-rw-r--r-- | src/registry/registry_person.c (renamed from registry/registry_person.c) | 2 | ||||
-rw-r--r-- | src/registry/registry_person.h (renamed from registry/registry_person.h) | 0 |
12 files changed, 230 insertions, 18 deletions
diff --git a/src/registry/README.md b/src/registry/README.md new file mode 100644 index 000000000..d976528c7 --- /dev/null +++ b/src/registry/README.md @@ -0,0 +1,217 @@ +<!-- +title: "Registry" +description: "Netdata utilizes a central registry of machines/person GUIDs, URLs, and opt-in account information to provide unified cross-server dashboards." +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/registry/README.md" +sidebar_label: "Registry" +learn_status: "Published" +learn_rel_path: "Configuration" +--> + +# Registry + +Netdata provides distributed monitoring. + +Traditional monitoring solutions centralize all the data to provide unified dashboards across all servers. Before +Netdata, this was the standard practice. However it has a few issues: + +1. due to the resources required, the number of metrics collected is limited. +2. for the same reason, the data collection frequency is not that high, at best it will be once every 10 or 15 seconds, + at worst every 5 or 10 mins. +3. the central monitoring solution needs dedicated resources, thus becoming "another bottleneck" in the whole + ecosystem. It also requires maintenance, administration, etc. +4. most centralized monitoring solutions are usually only good for presenting _statistics of past performance_ (i.e. + cannot be used for real-time performance troubleshooting). + +Netdata follows a different approach: + +1. data collection happens per second +2. thousands of metrics per server are collected +3. data do not leave the server where they are collected +4. Netdata servers do not talk to each other +5. your browser connects all the Netdata servers + +Using Netdata, your monitoring infrastructure is embedded on each server, limiting significantly the need of additional +resources. Netdata is blazingly fast, very resource efficient and utilizes server resources that already exist and are +spare (on each server). This allows **scaling out** the monitoring infrastructure. + +However, the Netdata approach introduces a few new issues that need to be addressed, one being **the list of Netdata we +have installed**, i.e. the URLs our Netdata servers are listening. + +To solve this, Netdata utilizes a **central registry**. This registry, together with certain browser features, allow +Netdata to provide unified cross-server dashboards. For example, when you jump from server to server using the node +menu, several session settings (like the currently viewed charts, the current zoom and pan operations on the charts, +etc.) are propagated to the new server, so that the new dashboard will come with exactly the same view. + +## What data does the registry store? + +The registry keeps track of 4 entities: + +1. **machines**: i.e. the Netdata installations (a random GUID generated by each Netdata the first time it starts; we + call this **machine_guid**) + + For each Netdata installation (each `machine_guid`) the registry keeps track of the different URLs it has accessed. + +2. **persons**: i.e. the web browsers accessing the Netdata installations (a random GUID generated by the registry the + first time it sees a new web browser; we call this **person_guid**) + + For each person, the registry keeps track of the Netdata installations it has accessed and their URLs. + +3. **URLs** of Netdata installations (as seen by the web browsers) + + For each URL, the registry keeps the URL and nothing more. Each URL is linked to _persons_ and _machines_. The only + way to find a URL is to know its **machine_guid** or have a **person_guid** it is linked to it. + +4. **accounts**: i.e. the information used to sign-in via one of the available sign-in methods. Depending on the + method, this may include an email, or an email and a profile picture or avatar. + +For _persons_/_accounts_ and _machines_, the registry keeps links to _URLs_, each link with 2 timestamps (first time +seen, last time seen) and a counter (number of times it has been seen). *machines_, _persons_ and timestamps are stored +in the Netdata registry regardless of whether you sign in or not. + +## Who talks to the registry? + +Your web browser **only**! If sending this information is against your policies, you +can [run your own registry](#run-your-own-registry) + +Your Netdata servers do not talk to the registry. This is a UML diagram of its operation: + +![registry](https://cloud.githubusercontent.com/assets/2662304/19448565/11a70632-94ab-11e6-9d80-f410b4acb797.png) + +## Which is the default registry? + +`https://registry.my-netdata.io`, which is currently served by `https://london.my-netdata.io`. This registry listens to +both HTTP and HTTPS requests but the default is HTTPS. + +### Can this registry handle the global load of Netdata installations? + +Yeap! The registry can handle 50.000 - 100.000 requests **per second per core** (depending on the type of CPU, the +computer's memory bandwidth, etc). 50.000 is on J1900 (celeron 2Ghz). + +We believe, it can do it... + +## Run your own registry + +**Every Netdata can be a registry**. Just pick one and configure it. + +**To turn any Netdata into a registry**, edit `/etc/netdata/netdata.conf` and set: + +```conf +[registry] + enabled = yes + registry to announce = http://your.registry:19999 +``` + +Restart your Netdata to activate it. + +Then, you need to tell **all your other Netdata servers to advertise your registry**, instead of the default. To do +this, on each of your Netdata servers, edit `/etc/netdata/netdata.conf` and set: + +```conf +[registry] + enabled = no + registry to announce = http://your.registry:19999 +``` + +Note that we have not enabled the registry on the other servers. Only one Netdata (the registry) needs +`[registry].enabled = yes`. + +This is it. You have your registry now. + +You may also want to give your server different names under the node menu (i.e. to have them sorted / grouped). You can +change its registry name, by setting on each Netdata server: + +```conf +[registry] + registry hostname = Group1 - Master DB +``` + +So this server will appear in the node menu as `Group1 - Master DB`. The max name length is 50 characters. + +### Limiting access to the registry + +Netdata v1.9+ support limiting access to the registry from given IPs, like this: + +```conf +[registry] + allow from = * +``` + +`allow from` settings are [Netdata simple patterns](/src/libnetdata/simple_pattern/README.md): string matches that use `*` +as wildcard (any number of times) and a `!` prefix for a negative match. So: `allow from = !10.1.2.3 10.*` will allow +all IPs in `10.*` except `10.1.2.3`. The order is important: left to right, the first positive or negative match is +used. + +Keep in mind that connections to Netdata API ports are filtered by `[web].allow connections from`. So, IPs allowed by +`[registry].allow from` should also be allowed by `[web].allow connection from`. + +The patterns can be matches over IP addresses or FQDN of the host. In order to check the FQDN of the connection without +opening the Netdata agent to DNS-spoofing, a reverse-dns record must be setup for the connecting host. At connection +time the reverse-dns of the peer IP address is resolved, and a forward DNS resolution is made to validate the IP address +against the name-pattern. + +Please note that this process can be expensive on a machine that is serving many connections. The behaviour of the +pattern matching can be controlled with the following setting: + +```conf +[registry] + allow by dns = heuristic +``` + +The settings are: +- `yes` allows the pattern to match DNS names. +- `no` disables DNS matching for the patterns (they only match IP addresses). +- `heuristic` will estimate if the patterns should match FQDNs by the presence or absence of `:`s or alpha-characters. + +### Where is the registry database stored? + +`/var/lib/netdata/registry/*.db` + +There can be up to 2 files: + +- `registry-log.db`, the transaction log + + all incoming requests that affect the registry are saved in this file in real-time. + +- `registry.db`, the database + + every `[registry].registry save db every new entries` entries in `registry-log.db`, Netdata will save its database + to `registry.db` and empty `registry-log.db`. + +Both files are machine readable text files. + +### How can I disable the SameSite and Secure cookies? + +Beginning with `v1.30.0`, when the Netdata Agent's web server processes a request, it delivers the `SameSite=none` +and `Secure` cookies. If you have problems accessing the local Agent dashboard or Netdata Cloud, disable these +cookies by [editing `netdata.conf`](/docs/netdata-agent/configuration/README.md#edit-netdataconf): + +```conf +[registry] + enable cookies SameSite and Secure = no +``` + +## The future + +The registry opens a whole world of new possibilities for Netdata. Check here what we think: +<https://github.com/netdata/netdata/issues/416> + +## Troubleshooting the registry + +The registry URL should be set to the URL of a Netdata dashboard. This server has to have `[registry].enabled = yes`. +So, accessing the registry URL directly with your web browser, should present the dashboard of the Netdata operating the +registry. + +To use the registry, your web browser needs to support **third party cookies**, since the cookies are set by the +registry while you are browsing the dashboard of another Netdata server. The registry, the first time it sees a new web +browser it tries to figure if the web browser has cookies enabled or not. It does this by setting a cookie and +redirecting the browser back to itself hoping that it will receive the cookie. If it does not receive the cookie, the +registry will keep redirecting your web browser back to itself, which after a few redirects will fail with an error like +this: + +```conf +ERROR 409: Cannot ACCESS netdata registry: https://registry.my-netdata.io responded with: {"status":"redirect","registry":"https://registry.my-netdata.io"} +``` + +This error is printed on your web browser console (press F12 on your browser to see it). + + diff --git a/registry/registry.c b/src/registry/registry.c index 3f3c4b274..803115231 100644 --- a/registry/registry.c +++ b/src/registry/registry.c @@ -257,7 +257,7 @@ int registry_request_access_json(RRDHOST *host, struct web_client *w, char *pers registry_json_header(host, w, "access", REGISTRY_STATUS_FAILED); registry_json_footer(w); registry_unlock(); - return HTTP_RESP_PRECOND_FAIL; + return HTTP_RESP_INTERNAL_SERVER_ERROR; } // set the cookie @@ -299,7 +299,7 @@ int registry_request_delete_json(RRDHOST *host, struct web_client *w, char *pers registry_json_header(host, w, "delete", REGISTRY_STATUS_FAILED); registry_json_footer(w); registry_unlock(); - return HTTP_RESP_PRECOND_FAIL; + return HTTP_RESP_BAD_REQUEST; } // generate the response @@ -320,7 +320,7 @@ int registry_request_search_json(RRDHOST *host, struct web_client *w, char *pers if(!person_guid || !person_guid[0]) { registry_json_header(host, w, "search", REGISTRY_STATUS_FAILED); registry_json_footer(w); - return HTTP_RESP_PRECOND_FAIL; + return HTTP_RESP_BAD_REQUEST; } registry_lock(); @@ -362,7 +362,7 @@ int registry_request_switch_json(RRDHOST *host, struct web_client *w, char *pers if(!person_guid || !person_guid[0]) { buffer_flush(w->response.data); buffer_strcat(w->response.data, "Who are you? Person GUID is missing"); - return HTTP_RESP_PRECOND_FAIL; + return HTTP_RESP_BAD_REQUEST; } if(!registry_is_valid_url(url)) { diff --git a/registry/registry.h b/src/registry/registry.h index 746fe430f..848eb0ac0 100644 --- a/registry/registry.h +++ b/src/registry/registry.h @@ -74,7 +74,7 @@ void registry_update_cloud_base_url(); // update the registry monitoring charts void registry_statistics(void); -char *registry_get_this_machine_guid(void); +const char *registry_get_this_machine_guid(void); char *registry_get_mgmt_api_key(void); char *registry_get_this_machine_hostname(void); diff --git a/registry/registry_db.c b/src/registry/registry_db.c index 448ca29d3..448ca29d3 100644 --- a/registry/registry_db.c +++ b/src/registry/registry_db.c diff --git a/registry/registry_init.c b/src/registry/registry_init.c index 79523e258..c291c6f82 100644 --- a/registry/registry_init.c +++ b/src/registry/registry_init.c @@ -172,9 +172,6 @@ int registry_init(void) { &netdata_configured_cache_dir, use_mmap, true); - // disable cancelability to avoid enable/disable per item in the dictionary locks - netdata_thread_disable_cancelability(); - registry_log_open(); registry_db_load(); registry_log_load(); @@ -185,8 +182,6 @@ int registry_init(void) { // registry_db_stats(); // registry_generate_curl_urls(); // exit(0); - - netdata_thread_enable_cancelability(); } return 0; diff --git a/registry/registry_internals.c b/src/registry/registry_internals.c index 74f12b558..54fad4254 100644 --- a/registry/registry_internals.c +++ b/src/registry/registry_internals.c @@ -11,7 +11,7 @@ struct registry registry; // parse a GUID and re-generated to be always lower case // this is used as a protection against the variations of GUIDs int regenerate_guid(const char *guid, char *result) { - uuid_t uuid; + nd_uuid_t uuid; if(unlikely(uuid_parse(guid, uuid) == -1)) { netdata_log_info("Registry: GUID '%s' is not a valid GUID.", guid); return -1; @@ -35,12 +35,12 @@ static inline char *registry_fix_machine_name(char *name, size_t *len) { char *s = name?name:""; // skip leading spaces - while(*s && isspace(*s)) s++; + while(*s && isspace((uint8_t)*s)) s++; // make sure all spaces are a SPACE char *t = s; while(*t) { - if(unlikely(isspace(*t))) + if(unlikely(isspace((uint8_t)*t))) *t = ' '; t++; @@ -270,14 +270,14 @@ char *registry_get_this_machine_hostname(void) { return registry.hostname; } -char *registry_get_this_machine_guid(void) { +const char *registry_get_this_machine_guid(void) { static char guid[GUID_LEN + 1] = ""; if(likely(guid[0])) return guid; // read it from disk - int fd = open(registry.machine_guid_filename, O_RDONLY); + int fd = open(registry.machine_guid_filename, O_RDONLY | O_CLOEXEC); if(fd != -1) { char buf[GUID_LEN + 1]; if(read(fd, buf, GUID_LEN) != GUID_LEN) @@ -298,14 +298,14 @@ char *registry_get_this_machine_guid(void) { // generate a new one? if(!guid[0]) { - uuid_t uuid; + nd_uuid_t uuid; uuid_generate_time(uuid); uuid_unparse_lower(uuid, guid); guid[GUID_LEN] = '\0'; // save it - fd = open(registry.machine_guid_filename, O_WRONLY|O_CREAT|O_TRUNC, 444); + fd = open(registry.machine_guid_filename, O_WRONLY|O_CREAT|O_TRUNC | O_CLOEXEC, 444); if(fd == -1) fatal("Cannot create unique machine id file '%s'. Please fix this.", registry.machine_guid_filename); diff --git a/registry/registry_internals.h b/src/registry/registry_internals.h index c7f8f43dd..c7f8f43dd 100644 --- a/registry/registry_internals.h +++ b/src/registry/registry_internals.h diff --git a/registry/registry_log.c b/src/registry/registry_log.c index 118d47c15..118d47c15 100644 --- a/registry/registry_log.c +++ b/src/registry/registry_log.c diff --git a/registry/registry_machine.c b/src/registry/registry_machine.c index a94fb8ea4..a94fb8ea4 100644 --- a/registry/registry_machine.c +++ b/src/registry/registry_machine.c diff --git a/registry/registry_machine.h b/src/registry/registry_machine.h index 4e8a407fe..4e8a407fe 100644 --- a/registry/registry_machine.h +++ b/src/registry/registry_machine.h diff --git a/registry/registry_person.c b/src/registry/registry_person.c index 4fd40fd75..a9d3ac88a 100644 --- a/registry/registry_person.c +++ b/src/registry/registry_person.c @@ -118,7 +118,7 @@ REGISTRY_PERSON *registry_person_allocate(const char *person_guid, time_t when) REGISTRY_PERSON *p = aral_mallocz(registry.persons_aral); if(!person_guid) { for(;;) { - uuid_t uuid; + nd_uuid_t uuid; uuid_generate(uuid); uuid_unparse_lower(uuid, p->guid); diff --git a/registry/registry_person.h b/src/registry/registry_person.h index 4c2ca710f..4c2ca710f 100644 --- a/registry/registry_person.h +++ b/src/registry/registry_person.h |