summaryrefslogtreecommitdiffstats
path: root/web/api
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--web/api/health/README.md49
-rw-r--r--web/api/health/health_cmdapi.c172
-rw-r--r--web/api/health/health_cmdapi.h2
-rw-r--r--web/api/netdata-swagger.json110
-rw-r--r--web/api/netdata-swagger.yaml77
5 files changed, 338 insertions, 72 deletions
diff --git a/web/api/health/README.md b/web/api/health/README.md
index 2003a61e0..66a80d5f6 100644
--- a/web/api/health/README.md
+++ b/web/api/health/README.md
@@ -45,6 +45,7 @@ The following will return an SVG badge of the alarm named `NAME`, attached to th
## Health Management API
Netdata v1.12 and beyond provides a command API to control health checks and notifications at runtime. The feature is especially useful for maintenance periods, during which you receive meaningless alarms.
+From Netdata v1.16.0 and beyond, the configuration controlled via the API commands is [persisted across netdata restarts](#persistence).
Specifically, the API allows you to:
- Disable health checks completely. Alarm conditions will not be evaluated at all and no entries will be added to the alarm log.
@@ -142,6 +143,43 @@ Example 2.2: Add one more selector, to also silence alarms for cpu1 and cpu2
http://localhost/api/v1/manage/health?families=cpu1 cpu2
```
+### List silencers
+
+The command `LIST` was added in netdata v1.16.0 and returns a JSON with the current status of the silencers.
+
+```
+ curl "http://myserver/api/v1/manage/health?cmd=LIST" -H "X-Auth-Token: Mytoken"
+```
+
+As an example, the following response shows that we have two silencers configured, one for an alarm called `samplealarm` and one for alarms with context `random` on host `myhost`
+```
+json
+{
+ "all": false,
+ "type": "SILENCE",
+ "silencers": [
+ {
+ "alarm": "samplealarm"
+ },
+ {
+ "context": "random",
+ "hosts": "myhost"
+ }
+ ]
+}
+```
+
+The response below shows that we have disabled all health checks.
+
+```
+json
+{
+ "all": true,
+ "type": "DISABLE",
+ "silencers": []
+}
+
+
### Responses
- "Auth Error" : Token authentication failed
@@ -155,6 +193,17 @@ http://localhost/api/v1/manage/health?families=cpu1 cpu2
- "WARNING: Added alarm selector to silence/disable alarms without a SILENCE or DISABLE command." : Added to the response if a selector is added without a selector-specific command.
- "WARNING: SILENCE or DISABLE command is ineffective without defining any alarm selectors." : Added to the response if a selector-specific command is issued without a selector.
+### Persistence
+
+From netdata v1.16.0 and beyond, the silencers configuration is persisted to disk and loaded when netdata starts.
+The JSON string returned by the [LIST command](#list-silencers) is automatically saved to the `silencers file`, every time a command alters the silencers configuration.
+The file's location is configurable in `netdata.conf`. The default is shown below:
+
+```
+[health]
+ # silencers file = /var/lib/netdata/health.silencers.json
+```
+
### Further reading
The test script under [tests/health_mgmtapi](../../../tests/health_mgmtapi) contains a series of tests that you can either run or read through to understand the various calls and responses better.
diff --git a/web/api/health/health_cmdapi.c b/web/api/health/health_cmdapi.c
index ec177751b..468054c67 100644
--- a/web/api/health/health_cmdapi.c
+++ b/web/api/health/health_cmdapi.c
@@ -1,17 +1,16 @@
//
-// Created by christopher on 11/12/18.
+// Created by Christopher on 11/12/18.
//
#include "health_cmdapi.h"
-
-static SILENCER *create_silencer(void) {
- SILENCER *t = callocz(1, sizeof(SILENCER));
- debug(D_HEALTH, "HEALTH command API: Created empty silencer");
-
- return t;
-}
-
+/**
+ * Free Silencers
+ *
+ * Clean the silencer structure
+ *
+ * @param t is the structure that will be cleaned.
+ */
void free_silencers(SILENCER *t) {
if (!t) return;
if (t->next) free_silencers(t->next);
@@ -31,38 +30,104 @@ void free_silencers(SILENCER *t) {
return;
}
+/**
+ * Silencers to JSON Entry
+ *
+ * Fill the buffer with the other values given.
+ *
+ * @param wb a pointer to the output buffer
+ * @param var the json variable
+ * @param val the json value
+ * @param hasprev has it a previous value?
+ *
+ * @return
+ */
+int health_silencers2json_entry(BUFFER *wb, char* var, char* val, int hasprev) {
+ if (val) {
+ buffer_sprintf(wb, "%s\n\t\t\t\"%s\": \"%s\"", (hasprev)?",":"", var, val);
+ return 1;
+ } else {
+ return hasprev;
+ }
+}
+/**
+ * Silencer to JSON
+ *
+ * Write the silencer values using JSON format inside a buffer.
+ *
+ * @param wb is the buffer to write the silencers.
+ */
+void health_silencers2json(BUFFER *wb) {
+ buffer_sprintf(wb, "{\n\t\"all\": %s,"
+ "\n\t\"type\": \"%s\","
+ "\n\t\"silencers\": [",
+ (silencers->all_alarms)?"true":"false",
+ (silencers->stype == STYPE_NONE)?"None":((silencers->stype == STYPE_DISABLE_ALARMS)?"DISABLE":"SILENCE"));
+
+ SILENCER *silencer;
+ int i = 0, j = 0;
+ for(silencer = silencers->silencers; silencer ; silencer = silencer->next) {
+ if(likely(i)) buffer_strcat(wb, ",");
+ buffer_strcat(wb, "\n\t\t{");
+ j=health_silencers2json_entry(wb, HEALTH_ALARM_KEY, silencer->alarms, j);
+ j=health_silencers2json_entry(wb, HEALTH_CHART_KEY, silencer->charts, j);
+ j=health_silencers2json_entry(wb, HEALTH_CONTEXT_KEY, silencer->contexts, j);
+ j=health_silencers2json_entry(wb, HEALTH_HOST_KEY, silencer->hosts, j);
+ health_silencers2json_entry(wb, HEALTH_FAMILIES_KEY, silencer->families, j);
+ j=0;
+ buffer_strcat(wb, "\n\t\t}");
+ i++;
+ }
+ if(likely(i)) buffer_strcat(wb, "\n\t");
+ buffer_strcat(wb, "]\n}\n");
+}
+/**
+ * Silencer to FILE
+ *
+ * Write the sliencer buffer to a file.
+ * @param wb
+ */
+void health_silencers2file(BUFFER *wb) {
+ if (wb->len == 0) return;
+
+ FILE *fd = fopen(silencers_filename, "wb");
+ if(fd) {
+ size_t written = (size_t)fprintf(fd, "%s", wb->buffer) ;
+ if (written == wb->len ) {
+ info("Silencer changes written to %s", silencers_filename);
+ }
+ fclose(fd);
+ return;
+ }
+ error("Silencer changes could not be written to %s. Error %s", silencers_filename, strerror(errno));
+}
+
+/**
+ * Request V1 MGMT Health
+ *
+ * Function called by api to management the health.
+ *
+ * @param host main structure with client information!
+ * @param w is the structure with all information of the client request.
+ * @param url is the url that netdata is working
+ *
+ * @return It returns 200 on success and another code otherwise.
+ */
int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, char *url) {
int ret = 400;
(void) host;
-
-
BUFFER *wb = w->response.data;
buffer_flush(wb);
wb->contenttype = CT_TEXT_PLAIN;
buffer_flush(w->response.data);
- static uint32_t
- hash_alarm = 0,
- hash_template = 0,
- hash_chart = 0,
- hash_context = 0,
- hash_host = 0,
- hash_families = 0;
-
- if (unlikely(!hash_alarm)) {
- hash_alarm = simple_uhash(HEALTH_ALARM_KEY);
- hash_template = simple_uhash(HEALTH_TEMPLATE_KEY);
- hash_chart = simple_uhash(HEALTH_CHART_KEY);
- hash_context = simple_uhash(HEALTH_CONTEXT_KEY);
- hash_host = simple_uhash(HEALTH_HOST_KEY);
- hash_families = simple_uhash(HEALTH_FAMILIES_KEY);
- }
-
+ //Local instance of the silencer
SILENCER *silencer = NULL;
+ int config_changed = 1;
if (!w->auth_bearer_token) {
buffer_strcat(wb, HEALTH_CMDAPI_MSG_AUTHERROR);
@@ -105,50 +170,17 @@ int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, c
free_silencers(silencers->silencers);
silencers->silencers = NULL;
buffer_strcat(wb, HEALTH_CMDAPI_MSG_RESET);
+ } else if (!strcmp(value, HEALTH_CMDAPI_CMD_LIST)) {
+ w->response.data->contenttype = CT_APPLICATION_JSON;
+ health_silencers2json(wb);
+ config_changed=0;
}
} else {
- uint32_t hash = simple_uhash(key);
- if (unlikely(silencer == NULL)) {
- if (
- (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) ||
- (hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) ||
- (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) ||
- (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) ||
- (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) ||
- (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY))
- ) {
- silencer = create_silencer();
- }
- }
-
- if (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
- silencer->alarms = strdupz(value);
- silencer->alarms_pattern = simple_pattern_create(silencer->alarms, NULL, SIMPLE_PATTERN_EXACT);
- } else if (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) {
- silencer->charts = strdupz(value);
- silencer->charts_pattern = simple_pattern_create(silencer->charts, NULL, SIMPLE_PATTERN_EXACT);
- } else if (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) {
- silencer->contexts = strdupz(value);
- silencer->contexts_pattern = simple_pattern_create(silencer->contexts, NULL, SIMPLE_PATTERN_EXACT);
- } else if (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) {
- silencer->hosts = strdupz(value);
- silencer->hosts_pattern = simple_pattern_create(silencer->hosts, NULL, SIMPLE_PATTERN_EXACT);
- } else if (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) {
- silencer->families = strdupz(value);
- silencer->families_pattern = simple_pattern_create(silencer->families, NULL, SIMPLE_PATTERN_EXACT);
- } else {
- buffer_strcat(wb, HEALTH_CMDAPI_MSG_INVALID_KEY);
- }
+ silencer = health_silencers_addparam(silencer, key, value);
}
-
}
if (likely(silencer)) {
- // Add the created instance to the linked list in silencers
- silencer->next = silencers->silencers;
- silencers->silencers = silencer;
- debug(D_HEALTH, "HEALTH command API: Added silencer %s:%s:%s:%s:%s", silencer->alarms,
- silencer->charts, silencer->contexts, silencer->hosts, silencer->families
- );
+ health_silencers_add(silencer);
buffer_strcat(wb, HEALTH_CMDAPI_MSG_ADDED);
if (silencers->stype == STYPE_NONE) {
buffer_strcat(wb, HEALTH_CMDAPI_MSG_STYPEWARNING);
@@ -162,5 +194,11 @@ int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, c
}
w->response.data = wb;
buffer_no_cacheable(w->response.data);
+ if (ret == 200 && config_changed) {
+ BUFFER *jsonb = buffer_create(200);
+ health_silencers2json(jsonb);
+ health_silencers2file(jsonb);
+ }
+
return ret;
}
diff --git a/web/api/health/health_cmdapi.h b/web/api/health/health_cmdapi.h
index d0f30401c..d8ec6aaa0 100644
--- a/web/api/health/health_cmdapi.h
+++ b/web/api/health/health_cmdapi.h
@@ -12,6 +12,7 @@
#define HEALTH_CMDAPI_CMD_SILENCE "SILENCE"
#define HEALTH_CMDAPI_CMD_DISABLE "DISABLE"
#define HEALTH_CMDAPI_CMD_RESET "RESET"
+#define HEALTH_CMDAPI_CMD_LIST "LIST"
#define HEALTH_CMDAPI_MSG_AUTHERROR "Auth Error\n"
#define HEALTH_CMDAPI_MSG_SILENCEALL "All alarm notifications are silenced\n"
@@ -20,7 +21,6 @@
#define HEALTH_CMDAPI_MSG_DISABLE "Health checks disabled for alarms matching the selectors\n"
#define HEALTH_CMDAPI_MSG_SILENCE "Alarm notifications silenced for alarms matching the selectors\n"
#define HEALTH_CMDAPI_MSG_ADDED "Alarm selector added\n"
-#define HEALTH_CMDAPI_MSG_INVALID_KEY "Invalid key. Ignoring it.\n"
#define HEALTH_CMDAPI_MSG_STYPEWARNING "WARNING: Added alarm selector to silence/disable alarms without a SILENCE or DISABLE command.\n"
#define HEALTH_CMDAPI_MSG_NOSELECTORWARNING "WARNING: SILENCE or DISABLE command is ineffective without defining any alarm selectors.\n"
diff --git a/web/api/netdata-swagger.json b/web/api/netdata-swagger.json
index 2fa55c4fa..63bc5638d 100644
--- a/web/api/netdata-swagger.json
+++ b/web/api/netdata-swagger.json
@@ -77,6 +77,39 @@
}
}
},
+ "/alarm_variables": {
+ "get": {
+ "summary": "List variables available to configure alarms for a chart",
+ "description": "Returns the basic information of a chart and all the variables that can be used in alarm and template health configurations for the particular chart or family",
+ "parameters": [
+ {
+ "name": "chart",
+ "in": "query",
+ "description": "The id of the chart as returned by the /charts call.",
+ "required": true,
+ "type": "string",
+ "format": "as returned by /charts"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "A javascript object with information about the chart and the available variables",
+ "schema": {
+ "$ref": "#/definitions/alarm_variables"
+ }
+ },
+ "400": {
+ "description": "Bad request - the body will include a message stating what is wrong."
+ },
+ "404": {
+ "description": "No chart with the given id is found."
+ },
+ "500": {
+ "description": "Internal server error. This usually means the server is out of memory."
+ }
+ }
+ }
+ },
"/data": {
"get": {
"summary": "Get collected data for a specific chart",
@@ -631,7 +664,7 @@
{
"name": "cmd",
"in": "query",
- "description": "DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors.",
+ "description": "DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors. LIST: Show active configuration.",
"required": false,
"type": "string",
"enum": [
@@ -639,7 +672,8 @@
"SILENCE ALL",
"DISABLE",
"SILENCE",
- "RESET"
+ "RESET",
+ "LIST"
]
},
{
@@ -951,6 +985,70 @@
}
}
},
+ "alarm_variables": {
+ "type": "object",
+ "properties": {
+ "chart": {
+ "type": "string",
+ "description": "The unique id of the chart"
+ },
+ "chart_name": {
+ "type": "string",
+ "description": "The name of the chart"
+ },
+ "cnart_context": {
+ "type": "string",
+ "description": "The context of the chart. It is shared across multiple monitored software or hardware instances and used in alarm templates"
+ },
+ "family": {
+ "type": "string",
+ "description": "The family of the chart."
+ },
+ "host": {
+ "type": "string",
+ "description": "The host containing the chart."
+ },
+ "chart_variables": {
+ "type": "object",
+ "properties": {
+ "varname1": {
+ "type": "number",
+ "format": "float"
+ },
+ "varname2": {
+ "type": "number",
+ "format": "float"
+ }
+ }
+ },
+ "family_variables": {
+ "type": "object",
+ "properties": {
+ "varname1": {
+ "type": "number",
+ "format": "float"
+ },
+ "varname2": {
+ "type": "number",
+ "format": "float"
+ }
+ }
+ },
+ "host_variables": {
+ "type": "object",
+ "properties": {
+ "varname1": {
+ "type": "number",
+ "format": "float"
+ },
+ "varname2": {
+ "type": "number",
+ "format": "float"
+ }
+ }
+ }
+ }
+ },
"dimension": {
"type": "object",
"properties": {
@@ -1208,6 +1306,14 @@
"crit_parsed": {
"type": "string"
},
+ "warn_repeat_every": {
+ "type": "integer",
+ "format": "int32"
+ },
+ "crit_repeat_every": {
+ "type": "integer",
+ "format": "int32"
+ },
"green": {
"type": "string",
"format": "nullable"
diff --git a/web/api/netdata-swagger.yaml b/web/api/netdata-swagger.yaml
index c021efefa..3386e01a7 100644
--- a/web/api/netdata-swagger.yaml
+++ b/web/api/netdata-swagger.yaml
@@ -63,6 +63,28 @@ paths:
$ref: '#/definitions/chart'
'404':
description: 'No chart with the given id is found.'
+ /alarm_variables:
+ get:
+ summary: 'List variables available to configure alarms for a chart'
+ description: 'Returns the basic information of a chart and all the variables that can be used in alarm and template health configurations for the particular chart or family'
+ parameters:
+ - name: chart
+ in: query
+ description: 'The id of the chart as returned by the /charts call.'
+ required: true
+ type: string
+ format: 'as returned by /charts'
+ responses:
+ '200':
+ description: 'A javascript object with information about the chart and the available variables'
+ schema:
+ $ref: '#/definitions/alarm_variables'
+ '400':
+ description: 'Bad request - the body will include a message stating what is wrong.'
+ '404':
+ description: 'No chart with the given id is found.'
+ '500':
+ description: 'Internal server error. This usually means the server is out of memory.'
/data:
get:
summary: 'Get collected data for a specific chart'
@@ -415,10 +437,10 @@ paths:
parameters:
- name: cmd
in: query
- description: 'DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors.'
+ description: 'DISABLE ALL: No alarm criteria are evaluated, nothing is written in the alarm log. SILENCE ALL: No notifications are sent. RESET: Return to the default state. DISABLE/SILENCE: Set the mode to be used for the alarms matching the criteria of the alarm selectors. LIST: Show active configuration.'
required: false
type: string
- enum: ['DISABLE ALL', 'SILENCE ALL', 'DISABLE', 'SILENCE', 'RESET']
+ enum: ['DISABLE ALL', 'SILENCE ALL', 'DISABLE', 'SILENCE', 'RESET', 'LIST']
- name: alarm
in: query
description: 'The expression provided will match both `alarm` and `template` names.'
@@ -638,6 +660,51 @@ definitions:
red:
type: number
description: 'Chart health red trheshold'
+ alarm_variables:
+ type: object
+ properties:
+ chart:
+ type: string
+ description: 'The unique id of the chart'
+ chart_name:
+ type: string
+ description: 'The name of the chart'
+ cnart_context:
+ type: string
+ description: 'The context of the chart. It is shared across multiple monitored software or hardware instances and used in alarm templates'
+ family:
+ type: string
+ description: 'The family of the chart.'
+ host:
+ type: string
+ description: 'The host containing the chart.'
+ chart_variables:
+ type: object
+ properties:
+ varname1:
+ type: number
+ format: float
+ varname2:
+ type: number
+ format: float
+ family_variables:
+ type: object
+ properties:
+ varname1:
+ type: number
+ format: float
+ varname2:
+ type: number
+ format: float
+ host_variables:
+ type: object
+ properties:
+ varname1:
+ type: number
+ format: float
+ varname2:
+ type: number
+ format: float
dimension:
type: object
properties:
@@ -825,6 +892,12 @@ definitions:
type: string
crit_parsed:
type: string
+ warn_repeat_every:
+ type: integer
+ format: int32
+ crit_repeat_every:
+ type: integer
+ format: int32
green:
type: string
format: nullable