diff options
Diffstat (limited to 'registry')
-rw-r--r-- | registry/Makefile.am | 9 | ||||
-rw-r--r-- | registry/README.md | 154 | ||||
-rw-r--r-- | registry/registry.c | 421 | ||||
-rw-r--r-- | registry/registry.h | 80 | ||||
-rw-r--r-- | registry/registry_db.c | 346 | ||||
-rw-r--r-- | registry/registry_init.c | 149 | ||||
-rw-r--r-- | registry/registry_internals.c | 325 | ||||
-rw-r--r-- | registry/registry_internals.h | 89 | ||||
-rw-r--r-- | registry/registry_log.c | 136 | ||||
-rw-r--r-- | registry/registry_machine.c | 104 | ||||
-rw-r--r-- | registry/registry_machine.h | 43 | ||||
-rw-r--r-- | registry/registry_person.c | 267 | ||||
-rw-r--r-- | registry/registry_person.h | 62 | ||||
-rw-r--r-- | registry/registry_url.c | 88 | ||||
-rw-r--r-- | registry/registry_url.h | 35 |
15 files changed, 2308 insertions, 0 deletions
diff --git a/registry/Makefile.am b/registry/Makefile.am new file mode 100644 index 0000000..1cb69ed --- /dev/null +++ b/registry/Makefile.am @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/registry/README.md b/registry/README.md new file mode 100644 index 0000000..5a9a2b3 --- /dev/null +++ b/registry/README.md @@ -0,0 +1,154 @@ +# Registry + +Netdata registry implements the `my-netdata` menu on netdata dashboards. +The `my-netdata` menu lists the netdata servers you have visited. + +## Why? + +Netdata provides distributed monitoring. + +Traditional monitoring solutions centralize all the data to provide unified dashboards across all servers. Before netdata, this was the standard practice. However it has a few issues: + +1. due to the resources required, the number of metrics collected is limited. +1. for the same reason, the data collection frequency is not that high, at best it will be once every 10 or 15 seconds, at worst every 5 or 10 mins. +1. the central monitoring solution needs dedicated resources, thus becoming "another bottleneck" in the whole ecosystem. It also requires maintenance, administration, etc. +1. most centralized monitoring solutions are usually only good for presenting *statistics of past performance* (i.e. cannot be used for real-time performance troubleshooting). + +Netdata follows a different approach: + +1. data collection happens per second +1. thousands of metrics per server are collected +1. data do not leave the server where they are collected +1. netdata servers do not talk to each other +1. your browser connects all the netdata servers + +Using netdata, your monitoring infrastructure is embedded on each server, limiting significantly the need of additional resources. Netdata is blazingly fast, very resource efficient and utilizes server resources that already exist and are spare (on each server). This allows **scaling out** the monitoring infrastructure. + +However, the netdata approach introduces a few new issues that need to be addressed, one being **the list of netdata we have installed**, i.e. the URLs our netdata servers are listening. + +To solve this, netdata utilizes a **central registry**. This registry, together with certain browser features, allow netdata to provide unified cross-server dashboards. For example, when you jump from server to server using the `my-netdata` menu, several session settings (like the currently viewed charts, the current zoom and pan operations on the charts, etc.) are propagated to the new server, so that the new dashboard will come with exactly the same view. + +## What is the registry? + +The registry keeps track of 3 entities: + +1. **machines**: i.e. the netdata installations (a random GUID generated by each netdata the first time it starts; we call this **machine_guid**) + + For each netdata installation (each `machine_guid`) the registry keeps track of the different URLs it is accessed. + +2. **persons**: i.e. the web browsers accessing the netdata installations (a random GUID generated by the registry the first time it sees a new web browser; we call this **person_guid**) + + For each person, the registry keeps track of the netdata installations it has accessed and their URLs. + +3. **URLs** of netdata installations (as seen by the web browsers) + + For each URL, the registry keeps the URL and nothing more. Each URL is linked to *persons* and *machines*. The only way to find a URL is to know its **machine_guid** or have a **person_guid** it is linked to it. + +## Who talks to the registry? + +Your web browser **only**! If sending this information is against your policies, you can [run your own registry](#run-your-own-registry) + +Your netdata servers do not talk to the registry. This is a UML diagram of its operation: + +![registry](https://cloud.githubusercontent.com/assets/2662304/19448565/11a70632-94ab-11e6-9d80-f410b4acb797.png) + +## What data does the registry store? + +Its database contains: + +- **random person GUIDs** (generated by the registry as a browser cookie) +- **random machine GUIDs** (generated by each netdata server on its first run), including the hostname of the server netdata is running (without the domain) +- **URLs** (the base URL for accessing a netdata server, as seen by the web browser) + +For *persons* and *machines*, the registry keeps links to *URLs*, each link with 2 timestamps (first time seen, last time seen) and a counter (number of times it has been seen). + +## Which is the default registry? + +`https://registry.my-netdata.io`, which is currently served by `https://london.my-netdata.io`. This registry listens to both HTTP and HTTPS requests but the default is HTTPS. + +### Can this registry handle the global load of netdata installations? + +Yeap! The registry can handle 50.000 - 100.000 requests **per second per core** (depending on the type of CPU, the computer's memory bandwidth, etc). 50.000 is on J1900 (celeron 2Ghz). + +We believe, it can do it... + +## Run your own registry + +**Every netdata can be a registry**. Just pick one and configure it. + +**To turn any netdata into a registry**, edit `/etc/netdata/netdata.conf` and set: + +``` +[registry] + enabled = yes + registry to announce = http://your.registry:19999 +``` + +Restart your netdata to activate it. + +Then, you need to tell **all your other netdata servers to advertise your registry**, instead of the default. To do this, on each of your netdata servers, edit `/etc/netdata/netdata.conf` and set: + +``` +[registry] + enabled = no + registry to announce = http://your.registry:19999 +``` + +Note that we have not enabled the registry on the other servers. Only one netdata (the registry) needs `[registry].enabled = yes`. + +This is it. You have your registry now. + +You may also want to give your server different names under the **my-netdata** menu (i.e. to have them sorted / grouped). You can change its registry name, by setting on each netdata server: + +``` +[registry] + registry hostname = Group1 - Master DB +``` + +So this server will appear in **my-netdata** as `Group1 - Master DB`. The max name length is 50 characters. + +### Limiting access to the registry + +netdata v1.9+ support limiting access to the registry from given IPs, like this: +``` +[registry] + allow from = * +``` + +`allow from` settings are [netdata simple patterns](../libnetdata/simple_pattern/): string matches that use `*` as wildcard (any number of times) and a `!` prefix for a negative match. So: `allow from = !10.1.2.3 10.*` will allow all IPs in `10.*` except `10.1.2.3`. The order is important: left to right, the first positive or negative match is used. + +Keep in mind that connections to netdata API ports are filtered by `[web].allow connections from`. So, IPs allowed by `[registry].allow from` should also be allowed by `[web].allow connection from`. + +### Where is the registry database stored? + +`/var/lib/netdata/registry/*.db` + +There can be up to 2 files: + +- `registry-log.db`, the transaction log + + all incoming requests that affect the registry are saved in this file in real-time. + +- `registry.db`, the database + + every `[registry].registry save db every new entries` entries in `registry-log.db`, netdata will save its database to `registry.db` and empty `registry-log.db`. + +Both files are machine readable text files. + +## The future + +The registry opens a whole world of new possibilities for netdata. Check here what we think: https://github.com/netdata/netdata/issues/416 + +## Troubleshooting the registry + +The registry URL should be set to the URL of a netdata dashboard. This server has to have `[registry].enabled = yes`. So, accessing the registry URL directly with your web browser, should present the dashboard of the netdata operating the registry. + +To use the registry, your web browser needs to support **third party cookies**, since the cookies are set by the registry while you are browsing the dashboard of another netdata server. The registry, the first time it sees a new web browser it tries to figure if the web browser has cookies enabled or not. It does this by setting a cookie and redirecting the browser back to itself hoping that it will receive the cookie. If it does not receive the cookie, the registry will keep redirecting your web browser back to itself, which after a few redirects will fail with an error like this: + +``` +ERROR 409: Cannot ACCESS netdata registry: https://registry.my-netdata.io responded with: {"status":"redirect","registry":"https://registry.my-netdata.io"} +``` + +This error is printed on your web browser console (press F12 on your browser to see it). + +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fregistry%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/registry/registry.c b/registry/registry.c new file mode 100644 index 0000000..aaa448c --- /dev/null +++ b/registry/registry.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +#define REGISTRY_STATUS_OK "ok" +#define REGISTRY_STATUS_FAILED "failed" +#define REGISTRY_STATUS_DISABLED "disabled" + +// ---------------------------------------------------------------------------- +// REGISTRY concurrency locking + +static inline void registry_lock(void) { + netdata_mutex_lock(®istry.lock); +} + +static inline void registry_unlock(void) { + netdata_mutex_unlock(®istry.lock); +} + + +// ---------------------------------------------------------------------------- +// COOKIES + +static void registry_set_cookie(struct web_client *w, const char *guid) { + char edate[100]; + time_t et = now_realtime_sec() + registry.persons_expiration; + struct tm etmbuf, *etm = gmtime_r(&et, &etmbuf); + strftime(edate, sizeof(edate), "%a, %d %b %Y %H:%M:%S %Z", etm); + + snprintfz(w->cookie1, NETDATA_WEB_REQUEST_COOKIE_SIZE, NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s", guid, edate); + + if(registry.registry_domain && registry.registry_domain[0]) + snprintfz(w->cookie2, NETDATA_WEB_REQUEST_COOKIE_SIZE, NETDATA_REGISTRY_COOKIE_NAME "=%s; Domain=%s; Expires=%s", guid, registry.registry_domain, edate); +} + +static inline void registry_set_person_cookie(struct web_client *w, REGISTRY_PERSON *p) { + registry_set_cookie(w, p->guid); +} + + +// ---------------------------------------------------------------------------- +// JSON GENERATION + +static inline void registry_json_header(RRDHOST *host, struct web_client *w, const char *action, const char *status) { + buffer_flush(w->response.data); + w->response.data->contenttype = CT_APPLICATION_JSON; + buffer_sprintf(w->response.data, "{\n\t\"action\": \"%s\",\n\t\"status\": \"%s\",\n\t\"hostname\": \"%s\",\n\t\"machine_guid\": \"%s\"", + action, status, host->registry_hostname, host->machine_guid); +} + +static inline void registry_json_footer(struct web_client *w) { + buffer_strcat(w->response.data, "\n}\n"); +} + +static inline int registry_json_disabled(RRDHOST *host, struct web_client *w, const char *action) { + registry_json_header(host, w, action, REGISTRY_STATUS_DISABLED); + + buffer_sprintf(w->response.data, ",\n\t\"registry\": \"%s\"", + registry.registry_to_announce); + + registry_json_footer(w); + return 200; +} + + +// ---------------------------------------------------------------------------- +// CALLBACKS FOR WALKING THROUGH REGISTRY OBJECTS + +// structure used be the callbacks below +struct registry_json_walk_person_urls_callback { + REGISTRY_PERSON *p; + REGISTRY_MACHINE *m; + struct web_client *w; + int count; +}; + +// callback for rendering PERSON_URLs +static int registry_json_person_url_callback(void *entry, void *data) { + REGISTRY_PERSON_URL *pu = (REGISTRY_PERSON_URL *)entry; + struct registry_json_walk_person_urls_callback *c = (struct registry_json_walk_person_urls_callback *)data; + struct web_client *w = c->w; + + if (!strcmp(pu->url->url,"***")) return 0; + + if(unlikely(c->count++)) + buffer_strcat(w->response.data, ","); + + buffer_sprintf(w->response.data, "\n\t\t[ \"%s\", \"%s\", %u000, %u, \"%s\" ]", + pu->machine->guid, pu->url->url, pu->last_t, pu->usages, pu->machine_name); + + return 0; +} + +// callback for rendering MACHINE_URLs +static int registry_json_machine_url_callback(void *entry, void *data) { + REGISTRY_MACHINE_URL *mu = (REGISTRY_MACHINE_URL *)entry; + struct registry_json_walk_person_urls_callback *c = (struct registry_json_walk_person_urls_callback *)data; + struct web_client *w = c->w; + REGISTRY_MACHINE *m = c->m; + + if (!strcmp(mu->url->url,"***")) return 1; + + if(unlikely(c->count++)) + buffer_strcat(w->response.data, ","); + + buffer_sprintf(w->response.data, "\n\t\t[ \"%s\", \"%s\", %u000, %u ]", + m->guid, mu->url->url, mu->last_t, mu->usages); + + return 1; +} + +// ---------------------------------------------------------------------------- + +// structure used be the callbacks below +struct registry_person_url_callback_verify_machine_exists_data { + REGISTRY_MACHINE *m; + int count; +}; + +static inline int registry_person_url_callback_verify_machine_exists(void *entry, void *data) { + struct registry_person_url_callback_verify_machine_exists_data *d = (struct registry_person_url_callback_verify_machine_exists_data *)data; + REGISTRY_PERSON_URL *pu = (REGISTRY_PERSON_URL *)entry; + REGISTRY_MACHINE *m = d->m; + + if(pu->machine == m) + d->count++; + + return 0; +} + +// ---------------------------------------------------------------------------- +// public HELLO request + +int registry_request_hello_json(RRDHOST *host, struct web_client *w) { + registry_json_header(host, w, "hello", REGISTRY_STATUS_OK); + + buffer_sprintf(w->response.data, + ",\n\t\"registry\": \"%s\",\n\t\"cloud_base_url\": \"%s\",\n\t\"anonymous_statistics\": %s", + registry.registry_to_announce, + registry.cloud_base_url, netdata_anonymous_statistics_enabled?"true":"false"); + + registry_json_footer(w); + return 200; +} + +// ---------------------------------------------------------------------------- +//public ACCESS request + +#define REGISTRY_VERIFY_COOKIES_GUID "give-me-back-this-cookie-now--please" + +// the main method for registering an access +int registry_request_access_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *name, time_t when) { + if(unlikely(!registry.enabled)) + return registry_json_disabled(host, w, "access"); + + // ------------------------------------------------------------------------ + // verify the browser supports cookies + + if(registry.verify_cookies_redirects > 0 && !person_guid[0]) { + buffer_flush(w->response.data); + registry_set_cookie(w, REGISTRY_VERIFY_COOKIES_GUID); + w->response.data->contenttype = CT_APPLICATION_JSON; + buffer_sprintf(w->response.data, "{ \"status\": \"redirect\", \"registry\": \"%s\" }", registry.registry_to_announce); + return 200; + } + + if(unlikely(person_guid[0] && !strcmp(person_guid, REGISTRY_VERIFY_COOKIES_GUID))) + person_guid[0] = '\0'; + + // ------------------------------------------------------------------------ + + registry_lock(); + + REGISTRY_PERSON *p = registry_request_access(person_guid, machine_guid, url, name, when); + if(!p) { + registry_json_header(host, w, "access", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 412; + } + + // set the cookie + registry_set_person_cookie(w, p); + + // generate the response + registry_json_header(host, w, "access", REGISTRY_STATUS_OK); + + buffer_sprintf(w->response.data, ",\n\t\"person_guid\": \"%s\",\n\t\"urls\": [", p->guid); + struct registry_json_walk_person_urls_callback c = { p, NULL, w, 0 }; + avl_traverse(&p->person_urls, registry_json_person_url_callback, &c); + buffer_strcat(w->response.data, "\n\t]\n"); + + registry_json_footer(w); + registry_unlock(); + return 200; +} + +// ---------------------------------------------------------------------------- +// public DELETE request + +// the main method for deleting a URL from a person +int registry_request_delete_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *delete_url, time_t when) { + if(!registry.enabled) + return registry_json_disabled(host, w, "delete"); + + registry_lock(); + + REGISTRY_PERSON *p = registry_request_delete(person_guid, machine_guid, url, delete_url, when); + if(!p) { + registry_json_header(host, w, "delete", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 412; + } + + // generate the response + registry_json_header(host, w, "delete", REGISTRY_STATUS_OK); + registry_json_footer(w); + registry_unlock(); + return 200; +} + +// ---------------------------------------------------------------------------- +// public SEARCH request + +// the main method for searching the URLs of a netdata +int registry_request_search_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when) { + if(!registry.enabled) + return registry_json_disabled(host, w, "search"); + + registry_lock(); + + REGISTRY_MACHINE *m = registry_request_machine(person_guid, machine_guid, url, request_machine, when); + if(!m) { + registry_json_header(host, w, "search", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 404; + } + + registry_json_header(host, w, "search", REGISTRY_STATUS_OK); + + buffer_strcat(w->response.data, ",\n\t\"urls\": ["); + struct registry_json_walk_person_urls_callback c = { NULL, m, w, 0 }; + dictionary_get_all(m->machine_urls, registry_json_machine_url_callback, &c); + buffer_strcat(w->response.data, "\n\t]\n"); + + registry_json_footer(w); + registry_unlock(); + return 200; +} + +// ---------------------------------------------------------------------------- +// SWITCH REQUEST + +// the main method for switching user identity +int registry_request_switch_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *new_person_guid, time_t when) { + if(!registry.enabled) + return registry_json_disabled(host, w, "switch"); + + (void)url; + (void)when; + + registry_lock(); + + REGISTRY_PERSON *op = registry_person_find(person_guid); + if(!op) { + registry_json_header(host, w, "switch", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 430; + } + + REGISTRY_PERSON *np = registry_person_find(new_person_guid); + if(!np) { + registry_json_header(host, w, "switch", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 431; + } + + REGISTRY_MACHINE *m = registry_machine_find(machine_guid); + if(!m) { + registry_json_header(host, w, "switch", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 432; + } + + struct registry_person_url_callback_verify_machine_exists_data data = { m, 0 }; + + // verify the old person has access to this machine + avl_traverse(&op->person_urls, registry_person_url_callback_verify_machine_exists, &data); + if(!data.count) { + registry_json_header(host, w, "switch", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 433; + } + + // verify the new person has access to this machine + data.count = 0; + avl_traverse(&np->person_urls, registry_person_url_callback_verify_machine_exists, &data); + if(!data.count) { + registry_json_header(host, w, "switch", REGISTRY_STATUS_FAILED); + registry_json_footer(w); + registry_unlock(); + return 434; + } + + // set the cookie of the new person + // the user just switched identity + registry_set_person_cookie(w, np); + + // generate the response + registry_json_header(host, w, "switch", REGISTRY_STATUS_OK); + buffer_sprintf(w->response.data, ",\n\t\"person_guid\": \"%s\"", np->guid); + registry_json_footer(w); + + registry_unlock(); + return 200; +} + +// ---------------------------------------------------------------------------- +// STATISTICS + +void registry_statistics(void) { + if(!registry.enabled) return; + + static RRDSET *sts = NULL, *stc = NULL, *stm = NULL; + + if(unlikely(!sts)) { + sts = rrdset_create_localhost( + "netdata" + , "registry_sessions" + , NULL + , "registry" + , NULL + , "NetData Registry Sessions" + , "sessions" + , "registry" + , "stats" + , 131000 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(sts, "sessions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(sts); + + rrddim_set(sts, "sessions", registry.usages_count); + rrdset_done(sts); + + // ------------------------------------------------------------------------ + + if(unlikely(!stc)) { + stc = rrdset_create_localhost( + "netdata" + , "registry_entries" + , NULL + , "registry" + , NULL + , "NetData Registry Entries" + , "entries" + , "registry" + , "stats" + , 131100 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rrddim_add(stc, "persons", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stc, "machines", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stc, "urls", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stc, "persons_urls", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stc, "machines_urls", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(stc); + + rrddim_set(stc, "persons", registry.persons_count); + rrddim_set(stc, "machines", registry.machines_count); + rrddim_set(stc, "urls", registry.urls_count); + rrddim_set(stc, "persons_urls", registry.persons_urls_count); + rrddim_set(stc, "machines_urls", registry.machines_urls_count); + rrdset_done(stc); + + // ------------------------------------------------------------------------ + + if(unlikely(!stm)) { + stm = rrdset_create_localhost( + "netdata" + , "registry_mem" + , NULL + , "registry" + , NULL + , "NetData Registry Memory" + , "KiB" + , "registry" + , "stats" + , 131300 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rrddim_add(stm, "persons", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stm, "machines", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stm, "urls", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stm, "persons_urls", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + rrddim_add(stm, "machines_urls", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + else rrdset_next(stm); + + rrddim_set(stm, "persons", registry.persons_memory + registry.persons_count * sizeof(NAME_VALUE) + sizeof(DICTIONARY)); + rrddim_set(stm, "machines", registry.machines_memory + registry.machines_count * sizeof(NAME_VALUE) + sizeof(DICTIONARY)); + rrddim_set(stm, "urls", registry.urls_memory); + rrddim_set(stm, "persons_urls", registry.persons_urls_memory); + rrddim_set(stm, "machines_urls", registry.machines_urls_memory + registry.machines_count * sizeof(DICTIONARY) + registry.machines_urls_count * sizeof(NAME_VALUE)); + rrdset_done(stm); +} diff --git a/registry/registry.h b/registry/registry.h new file mode 100644 index 0000000..ca74300 --- /dev/null +++ b/registry/registry.h @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +/* + * netdata registry + * + * this header file describes the public interface + * to the netdata registry + * + * only these high level functions are exposed + * + */ + +// ---------------------------------------------------------------------------- +// TODO +// +// 1. the default tracking cookie expires in 1 year, but the persons are not +// removed from the db - this means the database only grows - ideally the +// database should be cleaned in registry_db_save() for both on-disk and +// on-memory entries. +// +// Cleanup: +// i. Find all the PERSONs that have expired cookie +// ii. For each of their PERSON_URLs: +// - decrement the linked MACHINE links +// - if the linked MACHINE has no other links, remove the linked MACHINE too +// - remove the PERSON_URL +// +// 2. add protection to prevent abusing the registry by flooding it with +// requests to fill the memory and crash it. +// +// Possible protections: +// - limit the number of URLs per person +// - limit the number of URLs per machine +// - limit the number of persons +// - limit the number of machines +// - [DONE] limit the size of URLs +// - [DONE] limit the size of PERSON_URL names +// - limit the number of requests that add data to the registry, +// per client IP per hour +// +// 3. lower memory requirements +// +// - embed avl structures directly into registry objects, instead of DICTIONARY +// [DONE for PERSON_URLs, PENDING for MACHINE_URLs] +// - store GUIDs in memory as UUID instead of char * +// - do not track persons using the demo machines only +// (i.e. start tracking them only when they access a non-demo machine) +// - [DONE] do not track custom dashboards by default + +#ifndef NETDATA_REGISTRY_H +#define NETDATA_REGISTRY_H 1 + +#include "../daemon/common.h" + +#define NETDATA_REGISTRY_COOKIE_NAME "netdata_registry_id" + +// initialize the registry +// should only happen when netdata starts +extern int registry_init(void); + +// free all data held by the registry +// should only happen when netdata exits +extern void registry_free(void); + +// HTTP requests handled by the registry +extern int registry_request_access_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *name, time_t when); +extern int registry_request_delete_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *delete_url, time_t when); +extern int registry_request_search_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when); +extern int registry_request_switch_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *new_person_guid, time_t when); +extern int registry_request_hello_json(RRDHOST *host, struct web_client *w); + +// update the registry monitoring charts +extern void registry_statistics(void); + +extern char *registry_get_this_machine_guid(void); +extern char *registry_get_mgmt_api_key(void); +extern char *registry_get_this_machine_hostname(void); + +extern int regenerate_guid(const char *guid, char *result); + +#endif /* NETDATA_REGISTRY_H */ diff --git a/registry/registry_db.c b/registry/registry_db.c new file mode 100644 index 0000000..d8e2bbd --- /dev/null +++ b/registry/registry_db.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +int registry_db_should_be_saved(void) { + debug(D_REGISTRY, "log entries %llu, max %llu", registry.log_count, registry.save_registry_every_entries); + return registry.log_count > registry.save_registry_every_entries; +} + +// ---------------------------------------------------------------------------- +// INTERNAL FUNCTIONS FOR SAVING REGISTRY OBJECTS + +static int registry_machine_save_url(void *entry, void *file) { + REGISTRY_MACHINE_URL *mu = entry; + FILE *fp = file; + + debug(D_REGISTRY, "Registry: registry_machine_save_url('%s')", mu->url->url); + + int ret = fprintf(fp, "V\t%08x\t%08x\t%08x\t%02x\t%s\n", + mu->first_t, + mu->last_t, + mu->usages, + mu->flags, + mu->url->url + ); + + // error handling is done at registry_db_save() + + return ret; +} + +static int registry_machine_save(void *entry, void *file) { + REGISTRY_MACHINE *m = entry; + FILE *fp = file; + + debug(D_REGISTRY, "Registry: registry_machine_save('%s')", m->guid); + + int ret = fprintf(fp, "M\t%08x\t%08x\t%08x\t%s\n", + m->first_t, + m->last_t, + m->usages, + m->guid + ); + + if(ret >= 0) { + int ret2 = dictionary_get_all(m->machine_urls, registry_machine_save_url, fp); + if(ret2 < 0) return ret2; + ret += ret2; + } + + // error handling is done at registry_db_save() + + return ret; +} + +static inline int registry_person_save_url(void *entry, void *file) { + REGISTRY_PERSON_URL *pu = entry; + FILE *fp = file; + + debug(D_REGISTRY, "Registry: registry_person_save_url('%s')", pu->url->url); + + int ret = fprintf(fp, "U\t%08x\t%08x\t%08x\t%02x\t%s\t%s\t%s\n", + pu->first_t, + pu->last_t, + pu->usages, + pu->flags, + pu->machine->guid, + pu->machine_name, + pu->url->url + ); + + // error handling is done at registry_db_save() + + return ret; +} + +static inline int registry_person_save(void *entry, void *file) { + REGISTRY_PERSON *p = entry; + FILE *fp = file; + + debug(D_REGISTRY, "Registry: registry_person_save('%s')", p->guid); + + int ret = fprintf(fp, "P\t%08x\t%08x\t%08x\t%s\n", + p->first_t, + p->last_t, + p->usages, + p->guid + ); + + if(ret >= 0) { + //int ret2 = dictionary_get_all(p->person_urls, registry_person_save_url, fp); + int ret2 = avl_traverse(&p->person_urls, registry_person_save_url, fp); + if (ret2 < 0) return ret2; + ret += ret2; + } + + // error handling is done at registry_db_save() + + return ret; +} + +// ---------------------------------------------------------------------------- +// SAVE THE REGISTRY DATABASE + +int registry_db_save(void) { + if(unlikely(!registry.enabled)) + return -1; + + if(unlikely(!registry_db_should_be_saved())) + return -2; + + error_log_limit_unlimited(); + + char tmp_filename[FILENAME_MAX + 1]; + char old_filename[FILENAME_MAX + 1]; + + snprintfz(old_filename, FILENAME_MAX, "%s.old", registry.db_filename); + snprintfz(tmp_filename, FILENAME_MAX, "%s.tmp", registry.db_filename); + + debug(D_REGISTRY, "Registry: Creating file '%s'", tmp_filename); + FILE *fp = fopen(tmp_filename, "w"); + if(!fp) { + error("Registry: Cannot create file: %s", tmp_filename); + error_log_limit_reset(); + return -1; + } + + // dictionary_get_all() has its own locking, so this is safe to do + + debug(D_REGISTRY, "Saving all machines"); + int bytes1 = dictionary_get_all(registry.machines, registry_machine_save, fp); + if(bytes1 < 0) { + error("Registry: Cannot save registry machines - return value %d", bytes1); + fclose(fp); + error_log_limit_reset(); + return bytes1; + } + debug(D_REGISTRY, "Registry: saving machines took %d bytes", bytes1); + + debug(D_REGISTRY, "Saving all persons"); + int bytes2 = dictionary_get_all(registry.persons, registry_person_save, fp); + if(bytes2 < 0) { + error("Registry: Cannot save registry persons - return value %d", bytes2); + fclose(fp); + error_log_limit_reset(); + return bytes2; + } + debug(D_REGISTRY, "Registry: saving persons took %d bytes", bytes2); + + // save the totals + fprintf(fp, "T\t%016llx\t%016llx\t%016llx\t%016llx\t%016llx\t%016llx\n", + registry.persons_count, + registry.machines_count, + registry.usages_count + 1, // this is required - it is lost on db rotation + registry.urls_count, + registry.persons_urls_count, + registry.machines_urls_count + ); + + fclose(fp); + + errno = 0; + + // remove the .old db + debug(D_REGISTRY, "Registry: Removing old db '%s'", old_filename); + if(unlink(old_filename) == -1 && errno != ENOENT) + error("Registry: cannot remove old registry file '%s'", old_filename); + + // rename the db to .old + debug(D_REGISTRY, "Registry: Link current db '%s' to .old: '%s'", registry.db_filename, old_filename); + if(link(registry.db_filename, old_filename) == -1 && errno != ENOENT) + error("Registry: cannot move file '%s' to '%s'. Saving registry DB failed!", registry.db_filename, old_filename); + + else { + // remove the database (it is saved in .old) + debug(D_REGISTRY, "Registry: removing db '%s'", registry.db_filename); + if (unlink(registry.db_filename) == -1 && errno != ENOENT) + error("Registry: cannot remove old registry file '%s'", registry.db_filename); + + // move the .tmp to make it active + debug(D_REGISTRY, "Registry: linking tmp db '%s' to active db '%s'", tmp_filename, registry.db_filename); + if (link(tmp_filename, registry.db_filename) == -1) { + error("Registry: cannot move file '%s' to '%s'. Saving registry DB failed!", tmp_filename, + registry.db_filename); + + // move the .old back + debug(D_REGISTRY, "Registry: linking old db '%s' to active db '%s'", old_filename, registry.db_filename); + if(link(old_filename, registry.db_filename) == -1) + error("Registry: cannot move file '%s' to '%s'. Recovering the old registry DB failed!", old_filename, registry.db_filename); + } + else { + debug(D_REGISTRY, "Registry: removing tmp db '%s'", tmp_filename); + if(unlink(tmp_filename) == -1) + error("Registry: cannot remove tmp registry file '%s'", tmp_filename); + + // it has been moved successfully + // discard the current registry log + registry_log_recreate(); + registry.log_count = 0; + } + } + + // continue operations + error_log_limit_reset(); + + return -1; +} + +// ---------------------------------------------------------------------------- +// LOAD THE REGISTRY DATABASE + +size_t registry_db_load(void) { + char *s, buf[4096 + 1]; + REGISTRY_PERSON *p = NULL; + REGISTRY_MACHINE *m = NULL; + REGISTRY_URL *u = NULL; + size_t line = 0; + + debug(D_REGISTRY, "Registry: loading active db from: '%s'", registry.db_filename); + FILE *fp = fopen(registry.db_filename, "r"); + if(!fp) { + error("Registry: cannot open registry file: '%s'", registry.db_filename); + return 0; + } + + size_t len = 0; + buf[4096] = '\0'; + while((s = fgets_trim_len(buf, 4096, fp, &len))) { + line++; + + debug(D_REGISTRY, "Registry: read line %zu to length %zu: %s", line, len, s); + switch(*s) { + case 'T': // totals + if(unlikely(len != 103 || s[1] != '\t' || s[18] != '\t' || s[35] != '\t' || s[52] != '\t' || s[69] != '\t' || s[86] != '\t' || s[103] != '\0')) { + error("Registry totals line %zu is wrong (len = %zu).", line, len); + continue; + } + registry.persons_count = strtoull(&s[2], NULL, 16); + registry.machines_count = strtoull(&s[19], NULL, 16); + registry.usages_count = strtoull(&s[36], NULL, 16); + registry.urls_count = strtoull(&s[53], NULL, 16); + registry.persons_urls_count = strtoull(&s[70], NULL, 16); + registry.machines_urls_count = strtoull(&s[87], NULL, 16); + break; + + case 'P': // person + m = NULL; + // verify it is valid + if(unlikely(len != 65 || s[1] != '\t' || s[10] != '\t' || s[19] != '\t' || s[28] != '\t' || s[65] != '\0')) { + error("Registry person line %zu is wrong (len = %zu).", line, len); + continue; + } + + s[1] = s[10] = s[19] = s[28] = '\0'; + p = registry_person_allocate(&s[29], strtoul(&s[2], NULL, 16)); + p->last_t = (uint32_t)strtoul(&s[11], NULL, 16); + p->usages = (uint32_t)strtoul(&s[20], NULL, 16); + debug(D_REGISTRY, "Registry loaded person '%s', first: %u, last: %u, usages: %u", p->guid, p->first_t, p->last_t, p->usages); + break; + + case 'M': // machine + p = NULL; + // verify it is valid + if(unlikely(len != 65 || s[1] != '\t' || s[10] != '\t' || s[19] != '\t' || s[28] != '\t' || s[65] != '\0')) { + error("Registry person line %zu is wrong (len = %zu).", line, len); + continue; + } + + s[1] = s[10] = s[19] = s[28] = '\0'; + m = registry_machine_allocate(&s[29], strtoul(&s[2], NULL, 16)); + m->last_t = (uint32_t)strtoul(&s[11], NULL, 16); + m->usages = (uint32_t)strtoul(&s[20], NULL, 16); + debug(D_REGISTRY, "Registry loaded machine '%s', first: %u, last: %u, usages: %u", m->guid, m->first_t, m->last_t, m->usages); + break; + + case 'U': // person URL + if(unlikely(!p)) { + error("Registry: ignoring line %zu, no person loaded: %s", line, s); + continue; + } + + // verify it is valid + if(len < 69 || s[1] != '\t' || s[10] != '\t' || s[19] != '\t' || s[28] != '\t' || s[31] != '\t' || s[68] != '\t') { + error("Registry person URL line %zu is wrong (len = %zu).", line, len); + continue; + } + + s[1] = s[10] = s[19] = s[28] = s[31] = s[68] = '\0'; + + // skip the name to find the url + char *url = &s[69]; + while(*url && *url != '\t') url++; + if(!*url) { + error("Registry person URL line %zu does not have a url.", line); + continue; + } + *url++ = '\0'; + + // u = registry_url_allocate_nolock(url, strlen(url)); + u = registry_url_get(url, strlen(url)); + + time_t first_t = strtoul(&s[2], NULL, 16); + + m = registry_machine_find(&s[32]); + if(!m) m = registry_machine_allocate(&s[32], first_t); + + REGISTRY_PERSON_URL *pu = registry_person_url_allocate(p, m, u, &s[69], strlen(&s[69]), first_t); + pu->last_t = (uint32_t)strtoul(&s[11], NULL, 16); + pu->usages = (uint32_t)strtoul(&s[20], NULL, 16); + pu->flags = (uint8_t)strtoul(&s[29], NULL, 16); + debug(D_REGISTRY, "Registry loaded person URL '%s' with name '%s' of machine '%s', first: %u, last: %u, usages: %u, flags: %02x", u->url, pu->machine_name, m->guid, pu->first_t, pu->last_t, pu->usages, pu->flags); + break; + + case 'V': // machine URL + if(unlikely(!m)) { + error("Registry: ignoring line %zu, no machine loaded: %s", line, s); + continue; + } + + // verify it is valid + if(len < 32 || s[1] != '\t' || s[10] != '\t' || s[19] != '\t' || s[28] != '\t' || s[31] != '\t') { + error("Registry person URL line %zu is wrong (len = %zu).", line, len); + continue; + } + + s[1] = s[10] = s[19] = s[28] = s[31] = '\0'; + // u = registry_url_allocate_nolock(&s[32], strlen(&s[32])); + u = registry_url_get(&s[32], strlen(&s[32])); + + REGISTRY_MACHINE_URL *mu = registry_machine_url_allocate(m, u, strtoul(&s[2], NULL, 16)); + mu->last_t = (uint32_t)strtoul(&s[11], NULL, 16); + mu->usages = (uint32_t)strtoul(&s[20], NULL, 16); + mu->flags = (uint8_t)strtoul(&s[29], NULL, 16); + debug(D_REGISTRY, "Registry loaded machine URL '%s', machine '%s', first: %u, last: %u, usages: %u, flags: %02x", u->url, m->guid, mu->first_t, mu->last_t, mu->usages, mu->flags); + break; + + default: + error("Registry: ignoring line %zu of filename '%s': %s.", line, registry.db_filename, s); + break; + } + } + fclose(fp); + + return line; +} diff --git a/registry/registry_init.c b/registry/registry_init.c new file mode 100644 index 0000000..3cf140d --- /dev/null +++ b/registry/registry_init.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +int registry_init(void) { + char filename[FILENAME_MAX + 1]; + + // registry enabled? + if(web_server_mode != WEB_SERVER_MODE_NONE) { + registry.enabled = config_get_boolean(CONFIG_SECTION_REGISTRY, "enabled", 0); + } + else { + info("Registry is disabled - use the central netdata"); + config_set_boolean(CONFIG_SECTION_REGISTRY, "enabled", 0); + registry.enabled = 0; + } + + // pathnames + snprintfz(filename, FILENAME_MAX, "%s/registry", netdata_configured_varlib_dir); + registry.pathname = config_get(CONFIG_SECTION_REGISTRY, "registry db directory", filename); + if(mkdir(registry.pathname, 0770) == -1 && errno != EEXIST) + fatal("Cannot create directory '%s'.", registry.pathname); + + // filenames + snprintfz(filename, FILENAME_MAX, "%s/netdata.public.unique.id", registry.pathname); + registry.machine_guid_filename = config_get(CONFIG_SECTION_REGISTRY, "netdata unique id file", filename); + + snprintfz(filename, FILENAME_MAX, "%s/registry.db", registry.pathname); + registry.db_filename = config_get(CONFIG_SECTION_REGISTRY, "registry db file", filename); + + snprintfz(filename, FILENAME_MAX, "%s/registry-log.db", registry.pathname); + registry.log_filename = config_get(CONFIG_SECTION_REGISTRY, "registry log file", filename); + + // configuration options + registry.save_registry_every_entries = (unsigned long long)config_get_number(CONFIG_SECTION_REGISTRY, "registry save db every new entries", 1000000); + registry.persons_expiration = config_get_number(CONFIG_SECTION_REGISTRY, "registry expire idle persons days", 365) * 86400; + registry.registry_domain = config_get(CONFIG_SECTION_REGISTRY, "registry domain", ""); + registry.registry_to_announce = config_get(CONFIG_SECTION_REGISTRY, "registry to announce", "https://registry.my-netdata.io"); + registry.hostname = config_get(CONFIG_SECTION_REGISTRY, "registry hostname", netdata_configured_hostname); + registry.verify_cookies_redirects = config_get_boolean(CONFIG_SECTION_REGISTRY, "verify browser cookies support", 1); + + // netdata.cloud configuration, if cloud_base_url == "", cloud functionality is disabled. + registry.cloud_base_url = config_get(CONFIG_SECTION_CLOUD, "cloud base url", "https://netdata.cloud"); + + setenv("NETDATA_REGISTRY_HOSTNAME", registry.hostname, 1); + setenv("NETDATA_REGISTRY_URL", registry.registry_to_announce, 1); + + registry.max_url_length = (size_t)config_get_number(CONFIG_SECTION_REGISTRY, "max URL length", 1024); + if(registry.max_url_length < 10) { + registry.max_url_length = 10; + config_set_number(CONFIG_SECTION_REGISTRY, "max URL length", (long long)registry.max_url_length); + } + + registry.max_name_length = (size_t)config_get_number(CONFIG_SECTION_REGISTRY, "max URL name length", 50); + if(registry.max_name_length < 10) { + registry.max_name_length = 10; + config_set_number(CONFIG_SECTION_REGISTRY, "max URL name length", (long long)registry.max_name_length); + } + + // initialize entries counters + registry.persons_count = 0; + registry.machines_count = 0; + registry.usages_count = 0; + registry.urls_count = 0; + registry.persons_urls_count = 0; + registry.machines_urls_count = 0; + + // initialize memory counters + registry.persons_memory = 0; + registry.machines_memory = 0; + registry.urls_memory = 0; + registry.persons_urls_memory = 0; + registry.machines_urls_memory = 0; + + // initialize locks + netdata_mutex_init(®istry.lock); + + // create dictionaries + registry.persons = dictionary_create(DICTIONARY_FLAGS); + registry.machines = dictionary_create(DICTIONARY_FLAGS); + avl_init(®istry.registry_urls_root_index, registry_url_compare); + + // load the registry database + if(registry.enabled) { + registry_log_open(); + registry_db_load(); + registry_log_load(); + + if(unlikely(registry_db_should_be_saved())) + registry_db_save(); + } + + return 0; +} + +void registry_free(void) { + if(!registry.enabled) return; + + // we need to destroy the dictionaries ourselves + // since the dictionaries use memory we allocated + + while(registry.persons->values_index.root) { + REGISTRY_PERSON *p = ((NAME_VALUE *)registry.persons->values_index.root)->value; + registry_person_del(p); + } + + while(registry.machines->values_index.root) { + REGISTRY_MACHINE *m = ((NAME_VALUE *)registry.machines->values_index.root)->value; + + // fprintf(stderr, "\nMACHINE: '%s', first: %u, last: %u, usages: %u\n", m->guid, m->first_t, m->last_t, m->usages); + + while(m->machine_urls->values_index.root) { + REGISTRY_MACHINE_URL *mu = ((NAME_VALUE *)m->machine_urls->values_index.root)->value; + + // fprintf(stderr, "\tURL: '%s', first: %u, last: %u, usages: %u, flags: 0x%02x\n", mu->url->url, mu->first_t, mu->last_t, mu->usages, mu->flags); + + //debug(D_REGISTRY, "Registry: destroying persons dictionary from url '%s'", mu->url->url); + //dictionary_destroy(mu->persons); + + debug(D_REGISTRY, "Registry: deleting url '%s' from person '%s'", mu->url->url, m->guid); + dictionary_del(m->machine_urls, mu->url->url); + + debug(D_REGISTRY, "Registry: unlinking url '%s' from machine", mu->url->url); + registry_url_unlink(mu->url); + + debug(D_REGISTRY, "Registry: freeing machine url"); + freez(mu); + } + + debug(D_REGISTRY, "Registry: deleting machine '%s' from machines registry", m->guid); + dictionary_del(registry.machines, m->guid); + + debug(D_REGISTRY, "Registry: destroying URL dictionary of machine '%s'", m->guid); + dictionary_destroy(m->machine_urls); + + debug(D_REGISTRY, "Registry: freeing machine '%s'", m->guid); + freez(m); + } + + // and free the memory of remaining dictionary structures + + debug(D_REGISTRY, "Registry: destroying persons dictionary"); + dictionary_destroy(registry.persons); + + debug(D_REGISTRY, "Registry: destroying machines dictionary"); + dictionary_destroy(registry.machines); +} + diff --git a/registry/registry_internals.c b/registry/registry_internals.c new file mode 100644 index 0000000..b54b901 --- /dev/null +++ b/registry/registry_internals.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +struct registry registry; + +// ---------------------------------------------------------------------------- +// common functions + +// parse a GUID and re-generated to be always lower case +// this is used as a protection against the variations of GUIDs +int regenerate_guid(const char *guid, char *result) { + uuid_t uuid; + if(unlikely(uuid_parse(guid, uuid) == -1)) { + info("Registry: GUID '%s' is not a valid GUID.", guid); + return -1; + } + else { + uuid_unparse_lower(uuid, result); + +#ifdef NETDATA_INTERNAL_CHECKS + if(strcmp(guid, result) != 0) + info("GUID '%s' and re-generated GUID '%s' differ!", guid, result); +#endif /* NETDATA_INTERNAL_CHECKS */ + } + + return 0; +} + +// make sure the names of the machines / URLs do not contain any tabs +// (which are used as our separator in the database files) +// and are properly trimmed (before and after) +static inline char *registry_fix_machine_name(char *name, size_t *len) { + char *s = name?name:""; + + // skip leading spaces + while(*s && isspace(*s)) s++; + + // make sure all spaces are a SPACE + char *t = s; + while(*t) { + if(unlikely(isspace(*t))) + *t = ' '; + + t++; + } + + // remove trailing spaces + while(--t >= s) { + if(*t == ' ') + *t = '\0'; + else + break; + } + t++; + + if(likely(len)) + *len = (t - s); + + return s; +} + +static inline char *registry_fix_url(char *url, size_t *len) { + size_t l = 0; + char *s = registry_fix_machine_name(url, &l); + + // protection from too big URLs + if(l > registry.max_url_length) { + l = registry.max_url_length; + s[l] = '\0'; + } + + if(len) *len = l; + return s; +} + + +// ---------------------------------------------------------------------------- +// HELPERS + +// verify the person, the machine and the URL exist in our DB +REGISTRY_PERSON_URL *registry_verify_request(char *person_guid, char *machine_guid, char *url, REGISTRY_PERSON **pp, REGISTRY_MACHINE **mm) { + char pbuf[GUID_LEN + 1], mbuf[GUID_LEN + 1]; + + if(!person_guid || !*person_guid || !machine_guid || !*machine_guid || !url || !*url) { + info("Registry Request Verification: invalid request! person: '%s', machine '%s', url '%s'", person_guid?person_guid:"UNSET", machine_guid?machine_guid:"UNSET", url?url:"UNSET"); + return NULL; + } + + // normalize the url + url = registry_fix_url(url, NULL); + + // make sure the person GUID is valid + if(regenerate_guid(person_guid, pbuf) == -1) { + info("Registry Request Verification: invalid person GUID, person: '%s', machine '%s', url '%s'", person_guid, machine_guid, url); + return NULL; + } + person_guid = pbuf; + + // make sure the machine GUID is valid + if(regenerate_guid(machine_guid, mbuf) == -1) { + info("Registry Request Verification: invalid machine GUID, person: '%s', machine '%s', url '%s'", person_guid, machine_guid, url); + return NULL; + } + machine_guid = mbuf; + + // make sure the machine exists + REGISTRY_MACHINE *m = registry_machine_find(machine_guid); + if(!m) { + info("Registry Request Verification: machine not found, person: '%s', machine '%s', url '%s'", person_guid, machine_guid, url); + return NULL; + } + if(mm) *mm = m; + + // make sure the person exist + REGISTRY_PERSON *p = registry_person_find(person_guid); + if(!p) { + info("Registry Request Verification: person not found, person: '%s', machine '%s', url '%s'", person_guid, machine_guid, url); + return NULL; + } + if(pp) *pp = p; + + REGISTRY_PERSON_URL *pu = registry_person_url_index_find(p, url); + if(!pu) { + info("Registry Request Verification: URL not found for person, person: '%s', machine '%s', url '%s'", person_guid, machine_guid, url); + return NULL; + } + return pu; +} + + +// ---------------------------------------------------------------------------- +// REGISTRY REQUESTS + +REGISTRY_PERSON *registry_request_access(char *person_guid, char *machine_guid, char *url, char *name, time_t when) { + debug(D_REGISTRY, "registry_request_access('%s', '%s', '%s'): NEW REQUEST", (person_guid)?person_guid:"", machine_guid, url); + + REGISTRY_MACHINE *m = registry_machine_get(machine_guid, when); + if(!m) return NULL; + + // make sure the name is valid + size_t namelen; + name = registry_fix_machine_name(name, &namelen); + + size_t urllen; + url = registry_fix_url(url, &urllen); + + REGISTRY_PERSON *p = registry_person_get(person_guid, when); + + REGISTRY_URL *u = registry_url_get(url, urllen); + registry_person_link_to_url(p, m, u, name, namelen, when); + registry_machine_link_to_url(m, u, when); + + registry_log('A', p, m, u, name); + + registry.usages_count++; + + return p; +} + +REGISTRY_PERSON *registry_request_delete(char *person_guid, char *machine_guid, char *url, char *delete_url, time_t when) { + (void) when; + + REGISTRY_PERSON *p = NULL; + REGISTRY_MACHINE *m = NULL; + REGISTRY_PERSON_URL *pu = registry_verify_request(person_guid, machine_guid, url, &p, &m); + if(!pu || !p || !m) return NULL; + + // normalize the url + delete_url = registry_fix_url(delete_url, NULL); + + // make sure the user is not deleting the url it uses + if(!strcmp(delete_url, pu->url->url)) { + info("Registry Delete Request: delete URL is the one currently accessed, person: '%s', machine '%s', url '%s', delete url '%s'" + , p->guid, m->guid, pu->url->url, delete_url); + return NULL; + } + + REGISTRY_PERSON_URL *dpu = registry_person_url_index_find(p, delete_url); + if(!dpu) { + info("Registry Delete Request: URL not found for person: '%s', machine '%s', url '%s', delete url '%s'", p->guid + , m->guid, pu->url->url, delete_url); + return NULL; + } + + registry_log('D', p, m, pu->url, dpu->url->url); + registry_person_unlink_from_url(p, dpu); + + return p; +} + + +// a structure to pass to the dictionary_get_all() callback handler +struct machine_request_callback_data { + REGISTRY_MACHINE *find_this_machine; + REGISTRY_PERSON_URL *result; +}; + +// the callback function +// this will be run for every PERSON_URL of this PERSON +static int machine_request_callback(void *entry, void *data) { + REGISTRY_PERSON_URL *mypu = (REGISTRY_PERSON_URL *)entry; + struct machine_request_callback_data *myrdata = (struct machine_request_callback_data *)data; + + if(mypu->machine == myrdata->find_this_machine) { + myrdata->result = mypu; + return -1; // this will also stop the walk through + } + + return 0; // continue +} + +REGISTRY_MACHINE *registry_request_machine(char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when) { + (void)when; + + char mbuf[GUID_LEN + 1]; + + REGISTRY_PERSON *p = NULL; + REGISTRY_MACHINE *m = NULL; + REGISTRY_PERSON_URL *pu = registry_verify_request(person_guid, machine_guid, url, &p, &m); + if(!pu || !p || !m) return NULL; + + // make sure the machine GUID is valid + if(regenerate_guid(request_machine, mbuf) == -1) { + info("Registry Machine URLs request: invalid machine GUID, person: '%s', machine '%s', url '%s', request machine '%s'", p->guid, m->guid, pu->url->url, request_machine); + return NULL; + } + request_machine = mbuf; + + // make sure the machine exists + m = registry_machine_find(request_machine); + if(!m) { + info("Registry Machine URLs request: machine not found, person: '%s', machine '%s', url '%s', request machine '%s'", p->guid, machine_guid, pu->url->url, request_machine); + return NULL; + } + + // Verify the user has in the past accessed this machine + // We will walk through the PERSON_URLs to find the machine + // linking to our machine + + // a structure to pass to the dictionary_get_all() callback handler + struct machine_request_callback_data rdata = { m, NULL }; + + // request a walk through on the dictionary + avl_traverse(&p->person_urls, machine_request_callback, &rdata); + + if(rdata.result) + return m; + + return NULL; +} + + +// ---------------------------------------------------------------------------- +// REGISTRY THIS MACHINE UNIQUE ID + +static inline int is_machine_guid_blacklisted(const char *guid) { + // these are machine GUIDs that have been included in distribution packages. + // we blacklist them here, so that the next version of netdata will generate + // new ones. + + if(!strcmp(guid, "8a795b0c-2311-11e6-8563-000c295076a6") + || !strcmp(guid, "4aed1458-1c3e-11e6-a53f-000c290fc8f5") + ) { + error("Blacklisted machine GUID '%s' found.", guid); + return 1; + } + + return 0; +} + +char *registry_get_this_machine_hostname(void) { + return registry.hostname; +} + +char *registry_get_this_machine_guid(void) { + static char guid[GUID_LEN + 1] = ""; + + if(likely(guid[0])) + return guid; + + // read it from disk + int fd = open(registry.machine_guid_filename, O_RDONLY); + if(fd != -1) { + char buf[GUID_LEN + 1]; + if(read(fd, buf, GUID_LEN) != GUID_LEN) + error("Failed to read machine GUID from '%s'", registry.machine_guid_filename); + else { + buf[GUID_LEN] = '\0'; + if(regenerate_guid(buf, guid) == -1) { + error("Failed to validate machine GUID '%s' from '%s'. Ignoring it - this might mean this netdata will appear as duplicate in the registry.", + buf, registry.machine_guid_filename); + + guid[0] = '\0'; + } + else if(is_machine_guid_blacklisted(guid)) + guid[0] = '\0'; + } + close(fd); + } + + // generate a new one? + if(!guid[0]) { + uuid_t uuid; + + uuid_generate_time(uuid); + uuid_unparse_lower(uuid, guid); + guid[GUID_LEN] = '\0'; + + // save it + fd = open(registry.machine_guid_filename, O_WRONLY|O_CREAT|O_TRUNC, 444); + if(fd == -1) + fatal("Cannot create unique machine id file '%s'. Please fix this.", registry.machine_guid_filename); + + if(write(fd, guid, GUID_LEN) != GUID_LEN) + fatal("Cannot write the unique machine id file '%s'. Please fix this.", registry.machine_guid_filename); + + close(fd); + } + + setenv("NETDATA_REGISTRY_UNIQUE_ID", guid, 1); + + return guid; +} diff --git a/registry/registry_internals.h b/registry/registry_internals.h new file mode 100644 index 0000000..c126e45 --- /dev/null +++ b/registry/registry_internals.h @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_REGISTRY_INTERNALS_H_H +#define NETDATA_REGISTRY_INTERNALS_H_H 1 + +#include "registry.h" + +#define REGISTRY_URL_FLAGS_DEFAULT 0x00 +#define REGISTRY_URL_FLAGS_EXPIRED 0x01 + +#define DICTIONARY_FLAGS (DICTIONARY_FLAG_VALUE_LINK_DONT_CLONE | DICTIONARY_FLAG_NAME_LINK_DONT_CLONE | DICTIONARY_FLAG_SINGLE_THREADED) + +// ---------------------------------------------------------------------------- +// COMMON structures + +struct registry { + int enabled; + + // entries counters / statistics + unsigned long long persons_count; + unsigned long long machines_count; + unsigned long long usages_count; + unsigned long long urls_count; + unsigned long long persons_urls_count; + unsigned long long machines_urls_count; + unsigned long long log_count; + + // memory counters / statistics + unsigned long long persons_memory; + unsigned long long machines_memory; + unsigned long long urls_memory; + unsigned long long persons_urls_memory; + unsigned long long machines_urls_memory; + + // configuration + unsigned long long save_registry_every_entries; + char *registry_domain; + char *hostname; + char *registry_to_announce; + char *cloud_base_url; + time_t persons_expiration; // seconds to expire idle persons + int verify_cookies_redirects; + + size_t max_url_length; + size_t max_name_length; + + // file/path names + char *pathname; + char *db_filename; + char *log_filename; + char *machine_guid_filename; + + // open files + FILE *log_fp; + + // the database + DICTIONARY *persons; // dictionary of REGISTRY_PERSON *, with key the REGISTRY_PERSON.guid + DICTIONARY *machines; // dictionary of REGISTRY_MACHINE *, with key the REGISTRY_MACHINE.guid + + avl_tree registry_urls_root_index; + + netdata_mutex_t lock; +}; + +#include "registry_url.h" +#include "registry_machine.h" +#include "registry_person.h" +#include "registry.h" + +extern struct registry registry; + +// REGISTRY LOW-LEVEL REQUESTS (in registry-internals.c) +extern REGISTRY_PERSON *registry_request_access(char *person_guid, char *machine_guid, char *url, char *name, time_t when); +extern REGISTRY_PERSON *registry_request_delete(char *person_guid, char *machine_guid, char *url, char *delete_url, time_t when); +extern REGISTRY_MACHINE *registry_request_machine(char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when); + +// REGISTRY LOG (in registry_log.c) +extern void registry_log(char action, REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name); +extern int registry_log_open(void); +extern void registry_log_close(void); +extern void registry_log_recreate(void); +extern ssize_t registry_log_load(void); + +// REGISTRY DB (in registry_db.c) +extern int registry_db_save(void); +extern size_t registry_db_load(void); +extern int registry_db_should_be_saved(void); + +#endif //NETDATA_REGISTRY_INTERNALS_H_H diff --git a/registry/registry_log.c b/registry/registry_log.c new file mode 100644 index 0000000..e0e58ed --- /dev/null +++ b/registry/registry_log.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +void registry_log(char action, REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name) { + if(likely(registry.log_fp)) { + if(unlikely(fprintf(registry.log_fp, "%c\t%08x\t%s\t%s\t%s\t%s\n", + action, + p->last_t, + p->guid, + m->guid, + name, + u->url) < 0)) + error("Registry: failed to save log. Registry data may be lost in case of abnormal restart."); + + // we increase the counter even on failures + // so that the registry will be saved periodically + registry.log_count++; + + // this must be outside the log_lock(), or a deadlock will happen. + // registry_db_save() checks the same inside the log_lock, so only + // one thread will save the db + if(unlikely(registry_db_should_be_saved())) + registry_db_save(); + } +} + +int registry_log_open(void) { + if(registry.log_fp) + fclose(registry.log_fp); + + registry.log_fp = fopen(registry.log_filename, "a"); + if(registry.log_fp) { + if (setvbuf(registry.log_fp, NULL, _IOLBF, 0) != 0) + error("Cannot set line buffering on registry log file."); + return 0; + } + + error("Cannot open registry log file '%s'. Registry data will be lost in case of netdata or server crash.", registry.log_filename); + return -1; +} + +void registry_log_close(void) { + if(registry.log_fp) { + fclose(registry.log_fp); + registry.log_fp = NULL; + } +} + +void registry_log_recreate(void) { + if(registry.log_fp != NULL) { + registry_log_close(); + + // open it with truncate + registry.log_fp = fopen(registry.log_filename, "w"); + if(registry.log_fp) fclose(registry.log_fp); + else error("Cannot truncate registry log '%s'", registry.log_filename); + + registry.log_fp = NULL; + registry_log_open(); + } +} + +ssize_t registry_log_load(void) { + ssize_t line = -1; + + // closing the log is required here + // otherwise we will append to it the values we read + registry_log_close(); + + debug(D_REGISTRY, "Registry: loading active db from: %s", registry.log_filename); + FILE *fp = fopen(registry.log_filename, "r"); + if(!fp) + error("Registry: cannot open registry file: %s", registry.log_filename); + else { + char *s, buf[4096 + 1]; + line = 0; + size_t len = 0; + + while ((s = fgets_trim_len(buf, 4096, fp, &len))) { + line++; + + switch (s[0]) { + case 'A': // accesses + case 'D': // deletes + + // verify it is valid + if (unlikely(len < 85 || s[1] != '\t' || s[10] != '\t' || s[47] != '\t' || s[84] != '\t')) { + error("Registry: log line %zd is wrong (len = %zu).", line, len); + continue; + } + s[1] = s[10] = s[47] = s[84] = '\0'; + + // get the variables + time_t when = strtoul(&s[2], NULL, 16); + char *person_guid = &s[11]; + char *machine_guid = &s[48]; + char *name = &s[85]; + + // skip the name to find the url + char *url = name; + while(*url && *url != '\t') url++; + if(!*url) { + error("Registry: log line %zd does not have a url.", line); + continue; + } + *url++ = '\0'; + + // make sure the person exists + // without this, a new person guid will be created + REGISTRY_PERSON *p = registry_person_find(person_guid); + if(!p) p = registry_person_allocate(person_guid, when); + + if(s[0] == 'A') + registry_request_access(p->guid, machine_guid, url, name, when); + else + registry_request_delete(p->guid, machine_guid, url, name, when); + + registry.log_count++; + break; + + default: + error("Registry: ignoring line %zd of filename '%s': %s.", line, registry.log_filename, s); + break; + } + } + + fclose(fp); + } + + // open the log again + registry_log_open(); + + return line; +} diff --git a/registry/registry_machine.c b/registry/registry_machine.c new file mode 100644 index 0000000..8dbeb8e --- /dev/null +++ b/registry/registry_machine.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +// ---------------------------------------------------------------------------- +// MACHINE + +REGISTRY_MACHINE *registry_machine_find(const char *machine_guid) { + debug(D_REGISTRY, "Registry: registry_machine_find('%s')", machine_guid); + return dictionary_get(registry.machines, machine_guid); +} + +REGISTRY_MACHINE_URL *registry_machine_url_allocate(REGISTRY_MACHINE *m, REGISTRY_URL *u, time_t when) { + debug(D_REGISTRY, "registry_machine_url_allocate('%s', '%s'): allocating %zu bytes", m->guid, u->url, sizeof(REGISTRY_MACHINE_URL)); + + REGISTRY_MACHINE_URL *mu = mallocz(sizeof(REGISTRY_MACHINE_URL)); + + mu->first_t = mu->last_t = (uint32_t)when; + mu->usages = 1; + mu->url = u; + mu->flags = REGISTRY_URL_FLAGS_DEFAULT; + + registry.machines_urls_memory += sizeof(REGISTRY_MACHINE_URL); + + debug(D_REGISTRY, "registry_machine_url_allocate('%s', '%s'): indexing URL in machine", m->guid, u->url); + dictionary_set(m->machine_urls, u->url, mu, sizeof(REGISTRY_MACHINE_URL)); + + registry_url_link(u); + + return mu; +} + +REGISTRY_MACHINE *registry_machine_allocate(const char *machine_guid, time_t when) { + debug(D_REGISTRY, "Registry: registry_machine_allocate('%s'): creating new machine, sizeof(MACHINE)=%zu", machine_guid, sizeof(REGISTRY_MACHINE)); + + REGISTRY_MACHINE *m = mallocz(sizeof(REGISTRY_MACHINE)); + + strncpyz(m->guid, machine_guid, GUID_LEN); + + debug(D_REGISTRY, "Registry: registry_machine_allocate('%s'): creating dictionary of urls", machine_guid); + m->machine_urls = dictionary_create(DICTIONARY_FLAGS); + + m->first_t = m->last_t = (uint32_t)when; + m->usages = 0; + + registry.machines_memory += sizeof(REGISTRY_MACHINE); + + registry.machines_count++; + dictionary_set(registry.machines, m->guid, m, sizeof(REGISTRY_MACHINE)); + + return m; +} + +// 1. validate machine GUID +// 2. if it is valid, find it or create it and return it +// 3. if it is not valid, return NULL +REGISTRY_MACHINE *registry_machine_get(const char *machine_guid, time_t when) { + REGISTRY_MACHINE *m = NULL; + + if(likely(machine_guid && *machine_guid)) { + // validate it is a GUID + char buf[GUID_LEN + 1]; + if(unlikely(regenerate_guid(machine_guid, buf) == -1)) + info("Registry: machine guid '%s' is not a valid guid. Ignoring it.", machine_guid); + else { + machine_guid = buf; + m = registry_machine_find(machine_guid); + if(!m) m = registry_machine_allocate(machine_guid, when); + } + } + + return m; +} + + +// ---------------------------------------------------------------------------- +// LINKING OF OBJECTS + +REGISTRY_MACHINE_URL *registry_machine_link_to_url(REGISTRY_MACHINE *m, REGISTRY_URL *u, time_t when) { + debug(D_REGISTRY, "registry_machine_link_to_url('%s', '%s'): searching for URL in machine", m->guid, u->url); + + REGISTRY_MACHINE_URL *mu = dictionary_get(m->machine_urls, u->url); + if(!mu) { + debug(D_REGISTRY, "registry_machine_link_to_url('%s', '%s'): not found", m->guid, u->url); + mu = registry_machine_url_allocate(m, u, when); + registry.machines_urls_count++; + } + else { + debug(D_REGISTRY, "registry_machine_link_to_url('%s', '%s'): found", m->guid, u->url); + mu->usages++; + if(likely(mu->last_t < (uint32_t)when)) mu->last_t = (uint32_t)when; + } + + m->usages++; + if(likely(m->last_t < (uint32_t)when)) m->last_t = (uint32_t)when; + + if(mu->flags & REGISTRY_URL_FLAGS_EXPIRED) { + debug(D_REGISTRY, "registry_machine_link_to_url('%s', '%s'): accessing an expired URL.", m->guid, u->url); + mu->flags &= ~REGISTRY_URL_FLAGS_EXPIRED; + } + + return mu; +} diff --git a/registry/registry_machine.h b/registry/registry_machine.h new file mode 100644 index 0000000..77ab5aa --- /dev/null +++ b/registry/registry_machine.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_REGISTRY_MACHINE_H +#define NETDATA_REGISTRY_MACHINE_H 1 + +#include "registry_internals.h" + +// ---------------------------------------------------------------------------- +// MACHINE structures + +// For each MACHINE-URL pair we keep this +struct registry_machine_url { + REGISTRY_URL *url; // de-duplicated URL + + uint8_t flags; + + uint32_t first_t; // the first time we saw this + uint32_t last_t; // the last time we saw this + uint32_t usages; // how many times this has been accessed +}; +typedef struct registry_machine_url REGISTRY_MACHINE_URL; + +// A machine +struct registry_machine { + char guid[GUID_LEN + 1]; // the GUID + + uint32_t links; // the number of REGISTRY_PERSON_URL linked to this machine + + DICTIONARY *machine_urls; // MACHINE_URL * + + uint32_t first_t; // the first time we saw this + uint32_t last_t; // the last time we saw this + uint32_t usages; // how many times this has been accessed +}; +typedef struct registry_machine REGISTRY_MACHINE; + +extern REGISTRY_MACHINE *registry_machine_find(const char *machine_guid); +extern REGISTRY_MACHINE_URL *registry_machine_url_allocate(REGISTRY_MACHINE *m, REGISTRY_URL *u, time_t when); +extern REGISTRY_MACHINE *registry_machine_allocate(const char *machine_guid, time_t when); +extern REGISTRY_MACHINE *registry_machine_get(const char *machine_guid, time_t when); +extern REGISTRY_MACHINE_URL *registry_machine_link_to_url(REGISTRY_MACHINE *m, REGISTRY_URL *u, time_t when); + +#endif //NETDATA_REGISTRY_MACHINE_H diff --git a/registry/registry_person.c b/registry/registry_person.c new file mode 100644 index 0000000..268b0bd --- /dev/null +++ b/registry/registry_person.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +// ---------------------------------------------------------------------------- +// PERSON_URL INDEX + +int person_url_compare(void *a, void *b) { + register uint32_t hash1 = ((REGISTRY_PERSON_URL *)a)->url->hash; + register uint32_t hash2 = ((REGISTRY_PERSON_URL *)b)->url->hash; + + if(hash1 < hash2) return -1; + else if(hash1 > hash2) return 1; + else return strcmp(((REGISTRY_PERSON_URL *)a)->url->url, ((REGISTRY_PERSON_URL *)b)->url->url); +} + +inline REGISTRY_PERSON_URL *registry_person_url_index_find(REGISTRY_PERSON *p, const char *url) { + debug(D_REGISTRY, "Registry: registry_person_url_index_find('%s', '%s')", p->guid, url); + + char buf[sizeof(REGISTRY_URL) + strlen(url)]; + + REGISTRY_URL *u = (REGISTRY_URL *)&buf; + strcpy(u->url, url); + u->hash = simple_hash(u->url); + + REGISTRY_PERSON_URL tpu = { .url = u }; + + REGISTRY_PERSON_URL *pu = (REGISTRY_PERSON_URL *)avl_search(&p->person_urls, (void *)&tpu); + return pu; +} + +inline REGISTRY_PERSON_URL *registry_person_url_index_add(REGISTRY_PERSON *p, REGISTRY_PERSON_URL *pu) { + debug(D_REGISTRY, "Registry: registry_person_url_index_add('%s', '%s')", p->guid, pu->url->url); + REGISTRY_PERSON_URL *tpu = (REGISTRY_PERSON_URL *)avl_insert(&(p->person_urls), (avl *)(pu)); + if(tpu != pu) + error("Registry: registry_person_url_index_add('%s', '%s') already exists as '%s'", p->guid, pu->url->url, tpu->url->url); + + return tpu; +} + +inline REGISTRY_PERSON_URL *registry_person_url_index_del(REGISTRY_PERSON *p, REGISTRY_PERSON_URL *pu) { + debug(D_REGISTRY, "Registry: registry_person_url_index_del('%s', '%s')", p->guid, pu->url->url); + REGISTRY_PERSON_URL *tpu = (REGISTRY_PERSON_URL *)avl_remove(&(p->person_urls), (avl *)(pu)); + if(!tpu) + error("Registry: registry_person_url_index_del('%s', '%s') deleted nothing", p->guid, pu->url->url); + else if(tpu != pu) + error("Registry: registry_person_url_index_del('%s', '%s') deleted wrong URL '%s'", p->guid, pu->url->url, tpu->url->url); + + return tpu; +} + +// ---------------------------------------------------------------------------- +// PERSON_URL + +REGISTRY_PERSON_URL *registry_person_url_allocate(REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name, size_t namelen, time_t when) { + debug(D_REGISTRY, "registry_person_url_allocate('%s', '%s', '%s'): allocating %zu bytes", p->guid, m->guid, u->url, sizeof(REGISTRY_PERSON_URL) + namelen); + + // protection from too big names + if(namelen > registry.max_name_length) + namelen = registry.max_name_length; + + REGISTRY_PERSON_URL *pu = mallocz(sizeof(REGISTRY_PERSON_URL) + namelen); + + // a simple strcpy() should do the job + // but I prefer to be safe, since the caller specified urllen + strncpyz(pu->machine_name, name, namelen); + + pu->machine = m; + pu->first_t = pu->last_t = (uint32_t)when; + pu->usages = 1; + pu->url = u; + pu->flags = REGISTRY_URL_FLAGS_DEFAULT; + m->links++; + + registry.persons_urls_memory += sizeof(REGISTRY_PERSON_URL) + namelen; + + debug(D_REGISTRY, "registry_person_url_allocate('%s', '%s', '%s'): indexing URL in person", p->guid, m->guid, u->url); + REGISTRY_PERSON_URL *tpu = registry_person_url_index_add(p, pu); + if(tpu != pu) { + error("Registry: Attempted to add duplicate person url '%s' with name '%s' to person '%s'", u->url, name, p->guid); + freez(pu); + pu = tpu; + } + else + registry_url_link(u); + + return pu; +} + +void registry_person_url_free(REGISTRY_PERSON *p, REGISTRY_PERSON_URL *pu) { + debug(D_REGISTRY, "registry_person_url_free('%s', '%s')", p->guid, pu->url->url); + + REGISTRY_PERSON_URL *tpu = registry_person_url_index_del(p, pu); + if(tpu) { + registry_url_unlink(tpu->url); + tpu->machine->links--; + registry.persons_urls_memory -= sizeof(REGISTRY_PERSON_URL) + strlen(tpu->machine_name); + freez(tpu); + } +} + +// this function is needed to change the name of a PERSON_URL +REGISTRY_PERSON_URL *registry_person_url_reallocate(REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name, size_t namelen, time_t when, REGISTRY_PERSON_URL *pu) { + debug(D_REGISTRY, "registry_person_url_reallocate('%s', '%s', '%s'): allocating %zu bytes", p->guid, m->guid, u->url, sizeof(REGISTRY_PERSON_URL) + namelen); + + // keep a backup + REGISTRY_PERSON_URL pu2 = { + .first_t = pu->first_t, + .last_t = pu->last_t, + .usages = pu->usages, + .flags = pu->flags, + .machine = pu->machine, + .machine_name = "" + }; + + // remove the existing one from the index + registry_person_url_free(p, pu); + pu = &pu2; + + // allocate a new one + REGISTRY_PERSON_URL *tpu = registry_person_url_allocate(p, m, u, name, namelen, when); + tpu->first_t = pu->first_t; + tpu->last_t = pu->last_t; + tpu->usages = pu->usages; + tpu->flags = pu->flags; + + return tpu; +} + + +// ---------------------------------------------------------------------------- +// PERSON + +REGISTRY_PERSON *registry_person_find(const char *person_guid) { + debug(D_REGISTRY, "Registry: registry_person_find('%s')", person_guid); + return dictionary_get(registry.persons, person_guid); +} + +REGISTRY_PERSON *registry_person_allocate(const char *person_guid, time_t when) { + debug(D_REGISTRY, "Registry: registry_person_allocate('%s'): allocating new person, sizeof(PERSON)=%zu", (person_guid)?person_guid:"", sizeof(REGISTRY_PERSON)); + + REGISTRY_PERSON *p = mallocz(sizeof(REGISTRY_PERSON)); + if(!person_guid) { + for(;;) { + uuid_t uuid; + uuid_generate(uuid); + uuid_unparse_lower(uuid, p->guid); + + debug(D_REGISTRY, "Registry: Checking if the generated person guid '%s' is unique", p->guid); + if (!dictionary_get(registry.persons, p->guid)) { + debug(D_REGISTRY, "Registry: generated person guid '%s' is unique", p->guid); + break; + } + else + info("Registry: generated person guid '%s' found in the registry. Retrying...", p->guid); + } + } + else + strncpyz(p->guid, person_guid, GUID_LEN); + + debug(D_REGISTRY, "Registry: registry_person_allocate('%s'): creating dictionary of urls", p->guid); + avl_init(&p->person_urls, person_url_compare); + + p->first_t = p->last_t = (uint32_t)when; + p->usages = 0; + + registry.persons_memory += sizeof(REGISTRY_PERSON); + + registry.persons_count++; + dictionary_set(registry.persons, p->guid, p, sizeof(REGISTRY_PERSON)); + + return p; +} + + +// 1. validate person GUID +// 2. if it is valid, find it +// 3. if it is not valid, create a new one +// 4. return it +REGISTRY_PERSON *registry_person_get(const char *person_guid, time_t when) { + debug(D_REGISTRY, "Registry: registry_person_get('%s'): creating dictionary of urls", person_guid); + + REGISTRY_PERSON *p = NULL; + + if(person_guid && *person_guid) { + char buf[GUID_LEN + 1]; + // validate it is a GUID + if(unlikely(regenerate_guid(person_guid, buf) == -1)) + info("Registry: person guid '%s' is not a valid guid. Ignoring it.", person_guid); + else { + person_guid = buf; + p = registry_person_find(person_guid); + } + } + + if(!p) p = registry_person_allocate(NULL, when); + + return p; +} + +void registry_person_del(REGISTRY_PERSON *p) { + debug(D_REGISTRY, "Registry: registry_person_del('%s'): creating dictionary of urls", p->guid); + + while(p->person_urls.root) + registry_person_unlink_from_url(p, (REGISTRY_PERSON_URL *)p->person_urls.root); + + debug(D_REGISTRY, "Registry: deleting person '%s' from persons registry", p->guid); + dictionary_del(registry.persons, p->guid); + + debug(D_REGISTRY, "Registry: freeing person '%s'", p->guid); + freez(p); +} + +// ---------------------------------------------------------------------------- +// LINKING OF OBJECTS + +REGISTRY_PERSON_URL *registry_person_link_to_url(REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name, size_t namelen, time_t when) { + debug(D_REGISTRY, "registry_person_link_to_url('%s', '%s', '%s'): searching for URL in person", p->guid, m->guid, u->url); + + REGISTRY_PERSON_URL *pu = registry_person_url_index_find(p, u->url); + if(!pu) { + debug(D_REGISTRY, "registry_person_link_to_url('%s', '%s', '%s'): not found", p->guid, m->guid, u->url); + pu = registry_person_url_allocate(p, m, u, name, namelen, when); + registry.persons_urls_count++; + } + else { + debug(D_REGISTRY, "registry_person_link_to_url('%s', '%s', '%s'): found", p->guid, m->guid, u->url); + pu->usages++; + if(likely(pu->last_t < (uint32_t)when)) pu->last_t = (uint32_t)when; + + if(pu->machine != m) { + REGISTRY_MACHINE_URL *mu = dictionary_get(pu->machine->machine_urls, u->url); + if(mu) { + debug(D_REGISTRY, "registry_person_link_to_url('%s', '%s', '%s'): URL switched machines (old was '%s') - expiring it from previous machine.", + p->guid, m->guid, u->url, pu->machine->guid); + mu->flags |= REGISTRY_URL_FLAGS_EXPIRED; + } + else { + debug(D_REGISTRY, "registry_person_link_to_url('%s', '%s', '%s'): URL switched machines (old was '%s') - but the URL is not linked to the old machine.", + p->guid, m->guid, u->url, pu->machine->guid); + } + + pu->machine->links--; + pu->machine = m; + } + + if(strcmp(pu->machine_name, name) != 0) { + // the name of the PERSON_URL has changed ! + pu = registry_person_url_reallocate(p, m, u, name, namelen, when, pu); + } + } + + p->usages++; + if(likely(p->last_t < (uint32_t)when)) p->last_t = (uint32_t)when; + + if(pu->flags & REGISTRY_URL_FLAGS_EXPIRED) { + debug(D_REGISTRY, "registry_person_link_to_url('%s', '%s', '%s'): accessing an expired URL. Re-enabling URL.", p->guid, m->guid, u->url); + pu->flags &= ~REGISTRY_URL_FLAGS_EXPIRED; + } + + return pu; +} + +void registry_person_unlink_from_url(REGISTRY_PERSON *p, REGISTRY_PERSON_URL *pu) { + registry_person_url_free(p, pu); +} diff --git a/registry/registry_person.h b/registry/registry_person.h new file mode 100644 index 0000000..30e9cb5 --- /dev/null +++ b/registry/registry_person.h @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_REGISTRY_PERSON_H +#define NETDATA_REGISTRY_PERSON_H 1 + +#include "registry_internals.h" + +// ---------------------------------------------------------------------------- +// PERSON structures + +// for each PERSON-URL pair we keep this +struct registry_person_url { + avl avl; // binary tree node + + REGISTRY_URL *url; // de-duplicated URL + REGISTRY_MACHINE *machine; // link the MACHINE of this URL + + uint8_t flags; + + uint32_t first_t; // the first time we saw this + uint32_t last_t; // the last time we saw this + uint32_t usages; // how many times this has been accessed + + char machine_name[1]; // the name of the machine, as known by the user + // dynamically allocated to fit properly +}; +typedef struct registry_person_url REGISTRY_PERSON_URL; + +// A person +struct registry_person { + char guid[GUID_LEN + 1]; // the person GUID + + avl_tree person_urls; // dictionary of PERSON_URLs + + uint32_t first_t; // the first time we saw this + uint32_t last_t; // the last time we saw this + uint32_t usages; // how many times this has been accessed + + //uint32_t flags; + //char *email; +}; +typedef struct registry_person REGISTRY_PERSON; + +// PERSON_URL +extern REGISTRY_PERSON_URL *registry_person_url_index_find(REGISTRY_PERSON *p, const char *url); +extern REGISTRY_PERSON_URL *registry_person_url_index_add(REGISTRY_PERSON *p, REGISTRY_PERSON_URL *pu) NEVERNULL WARNUNUSED; +extern REGISTRY_PERSON_URL *registry_person_url_index_del(REGISTRY_PERSON *p, REGISTRY_PERSON_URL *pu) WARNUNUSED; + +extern REGISTRY_PERSON_URL *registry_person_url_allocate(REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name, size_t namelen, time_t when); +extern REGISTRY_PERSON_URL *registry_person_url_reallocate(REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name, size_t namelen, time_t when, REGISTRY_PERSON_URL *pu); + +// PERSON +extern REGISTRY_PERSON *registry_person_find(const char *person_guid); +extern REGISTRY_PERSON *registry_person_allocate(const char *person_guid, time_t when); +extern REGISTRY_PERSON *registry_person_get(const char *person_guid, time_t when); +extern void registry_person_del(REGISTRY_PERSON *p); + +// LINKING PERSON -> PERSON_URL +extern REGISTRY_PERSON_URL *registry_person_link_to_url(REGISTRY_PERSON *p, REGISTRY_MACHINE *m, REGISTRY_URL *u, char *name, size_t namelen, time_t when); +extern void registry_person_unlink_from_url(REGISTRY_PERSON *p, REGISTRY_PERSON_URL *pu); + +#endif //NETDATA_REGISTRY_PERSON_H diff --git a/registry/registry_url.c b/registry/registry_url.c new file mode 100644 index 0000000..9ac3ce1 --- /dev/null +++ b/registry/registry_url.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../daemon/common.h" +#include "registry_internals.h" + +// ---------------------------------------------------------------------------- +// REGISTRY_URL + +int registry_url_compare(void *a, void *b) { + if(((REGISTRY_URL *)a)->hash < ((REGISTRY_URL *)b)->hash) return -1; + else if(((REGISTRY_URL *)a)->hash > ((REGISTRY_URL *)b)->hash) return 1; + else return strcmp(((REGISTRY_URL *)a)->url, ((REGISTRY_URL *)b)->url); +} + +inline REGISTRY_URL *registry_url_index_add(REGISTRY_URL *u) { + return (REGISTRY_URL *)avl_insert(&(registry.registry_urls_root_index), (avl *)(u)); +} + +inline REGISTRY_URL *registry_url_index_del(REGISTRY_URL *u) { + return (REGISTRY_URL *)avl_remove(&(registry.registry_urls_root_index), (avl *)(u)); +} + +REGISTRY_URL *registry_url_get(const char *url, size_t urllen) { + // protection from too big URLs + if(urllen > registry.max_url_length) + urllen = registry.max_url_length; + + debug(D_REGISTRY, "Registry: registry_url_get('%s', %zu)", url, urllen); + + char buf[sizeof(REGISTRY_URL) + urllen]; // no need for +1, 1 is already in REGISTRY_URL + REGISTRY_URL *n = (REGISTRY_URL *)&buf[0]; + n->len = (uint16_t)urllen; + strncpyz(n->url, url, n->len); + n->hash = simple_hash(n->url); + + REGISTRY_URL *u = (REGISTRY_URL *)avl_search(&(registry.registry_urls_root_index), (avl *)n); + if(!u) { + debug(D_REGISTRY, "Registry: registry_url_get('%s', %zu): allocating %zu bytes", url, urllen, sizeof(REGISTRY_URL) + urllen); + u = callocz(1, sizeof(REGISTRY_URL) + urllen); // no need for +1, 1 is already in REGISTRY_URL + + // a simple strcpy() should do the job + // but I prefer to be safe, since the caller specified urllen + u->len = (uint16_t)urllen; + strncpyz(u->url, url, u->len); + u->links = 0; + u->hash = simple_hash(u->url); + + registry.urls_memory += sizeof(REGISTRY_URL) + urllen; // no need for +1, 1 is already in REGISTRY_URL + + debug(D_REGISTRY, "Registry: registry_url_get('%s'): indexing it", url); + n = registry_url_index_add(u); + if(n != u) { + error("INTERNAL ERROR: registry_url_get(): url '%s' already exists in the registry as '%s'", u->url, n->url); + freez(u); + u = n; + } + else + registry.urls_count++; + } + + return u; +} + +void registry_url_link(REGISTRY_URL *u) { + u->links++; + debug(D_REGISTRY, "Registry: registry_url_link('%s'): URL has now %u links", u->url, u->links); +} + +void registry_url_unlink(REGISTRY_URL *u) { + u->links--; + if(!u->links) { + debug(D_REGISTRY, "Registry: registry_url_unlink('%s'): No more links for this URL", u->url); + REGISTRY_URL *n = registry_url_index_del(u); + if(!n) { + error("INTERNAL ERROR: registry_url_unlink('%s'): cannot find url in index", u->url); + } + else { + if(n != u) { + error("INTERNAL ERROR: registry_url_unlink('%s'): deleted different url '%s'", u->url, n->url); + } + + registry.urls_memory -= sizeof(REGISTRY_URL) + n->len; // no need for +1, 1 is already in REGISTRY_URL + freez(n); + } + } + else + debug(D_REGISTRY, "Registry: registry_url_unlink('%s'): URL has %u links left", u->url, u->links); +} diff --git a/registry/registry_url.h b/registry/registry_url.h new file mode 100644 index 0000000..c684f1c --- /dev/null +++ b/registry/registry_url.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_REGISTRY_URL_H +#define NETDATA_REGISTRY_URL_H 1 + +#include "registry_internals.h" + +// ---------------------------------------------------------------------------- +// URL structures +// Save memory by de-duplicating URLs +// so instead of storing URLs all over the place +// we store them here and we keep pointers elsewhere + +struct registry_url { + avl avl; + uint32_t hash; // the index hash + + uint32_t links; // the number of links to this URL - when none is left, we free it + + uint16_t len; // the length of the URL in bytes + char url[1]; // the URL - dynamically allocated to more size +}; +typedef struct registry_url REGISTRY_URL; + +// REGISTRY_URL INDEX +extern int registry_url_compare(void *a, void *b); +extern REGISTRY_URL *registry_url_index_del(REGISTRY_URL *u) WARNUNUSED; +extern REGISTRY_URL *registry_url_index_add(REGISTRY_URL *u) NEVERNULL WARNUNUSED; + +// REGISTRY_URL MANAGEMENT +extern REGISTRY_URL *registry_url_get(const char *url, size_t urllen) NEVERNULL; +extern void registry_url_link(REGISTRY_URL *u); +extern void registry_url_unlink(REGISTRY_URL *u); + +#endif //NETDATA_REGISTRY_URL_H |