summaryrefslogtreecommitdiffstats
path: root/exec/mon.c
diff options
context:
space:
mode:
Diffstat (limited to 'exec/mon.c')
-rw-r--r--exec/mon.c511
1 files changed, 511 insertions, 0 deletions
diff --git a/exec/mon.c b/exec/mon.c
new file mode 100644
index 0000000..3f71fb5
--- /dev/null
+++ b/exec/mon.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (c) 2010-2012 Red Hat, Inc.
+ *
+ * All rights reserved.
+ *
+ * Author: Angus Salkeld <asalkeld@redhat.com>
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <unistd.h>
+#include <statgrab.h>
+
+#include <corosync/corotypes.h>
+#include <corosync/corodefs.h>
+#include <corosync/coroapi.h>
+#include <qb/qblist.h>
+#include <corosync/logsys.h>
+#include <corosync/icmap.h>
+#include "fsm.h"
+
+#include "service.h"
+
+LOGSYS_DECLARE_SUBSYS ("MON");
+
+/*
+ * Service Interfaces required by service_message_handler struct
+ */
+static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api);
+
+static struct corosync_api_v1 *api;
+#define MON_DEFAULT_PERIOD 3000
+#define MON_MIN_PERIOD 500
+#define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC)
+
+struct corosync_service_engine mon_service_engine = {
+ .name = "corosync resource monitoring service",
+ .id = MON_SERVICE,
+ .priority = 1,
+ .private_data_size = 0,
+ .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
+ .lib_init_fn = NULL,
+ .lib_exit_fn = NULL,
+ .lib_engine = NULL,
+ .lib_engine_count = 0,
+ .exec_engine = NULL,
+ .exec_engine_count = 0,
+ .confchg_fn = NULL,
+ .exec_init_fn = mon_exec_init_fn,
+ .exec_dump_fn = NULL
+};
+
+static QB_LIST_DECLARE (confchg_notify);
+
+
+struct resource_instance {
+ const char *icmap_path;
+ const char *name;
+ corosync_timer_handle_t timer_handle;
+ void (*update_stats_fn) (void *data);
+ struct cs_fsm fsm;
+ uint64_t period;
+ icmap_value_types_t max_type;
+ union {
+ int32_t int32;
+ double dbl;
+ } max;
+};
+
+static void mem_update_stats_fn (void *data);
+static void load_update_stats_fn (void *data);
+
+static struct resource_instance memory_used_inst = {
+ .name = "memory_used",
+ .icmap_path = "resources.system.memory_used.",
+ .update_stats_fn = mem_update_stats_fn,
+ .max_type = ICMAP_VALUETYPE_INT32,
+ .max.int32 = INT32_MAX,
+ .period = MON_DEFAULT_PERIOD,
+};
+
+static struct resource_instance load_15min_inst = {
+ .name = "load_15min",
+ .icmap_path = "resources.system.load_15min.",
+ .update_stats_fn = load_update_stats_fn,
+ .max_type = ICMAP_VALUETYPE_DOUBLE,
+ .max.dbl = INT32_MAX,
+ .period = MON_DEFAULT_PERIOD,
+};
+
+
+/*
+ * F S M
+ */
+static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
+static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
+
+const char * mon_running_str = "running";
+const char * mon_failed_str = "failed";
+const char * mon_failure_str = "failure";
+const char * mon_stopped_str = "stopped";
+const char * mon_config_changed_str = "config_changed";
+
+enum mon_resource_state {
+ MON_S_STOPPED,
+ MON_S_RUNNING,
+ MON_S_FAILED
+};
+enum mon_resource_event {
+ MON_E_CONFIG_CHANGED,
+ MON_E_FAILURE
+};
+
+struct cs_fsm_entry mon_fsm_table[] = {
+ { MON_S_STOPPED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_STOPPED, MON_S_RUNNING, -1} },
+ { MON_S_STOPPED, MON_E_FAILURE, NULL, {-1} },
+ { MON_S_RUNNING, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
+ { MON_S_RUNNING, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} },
+ { MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
+ { MON_S_FAILED, MON_E_FAILURE, NULL, {-1} },
+};
+
+struct corosync_service_engine *mon_get_service_engine_ver0 (void)
+{
+ return (&mon_service_engine);
+}
+
+static const char * mon_res_state_to_str(struct cs_fsm* fsm,
+ int32_t state)
+{
+ switch (state) {
+ case MON_S_STOPPED:
+ return mon_stopped_str;
+ break;
+ case MON_S_RUNNING:
+ return mon_running_str;
+ break;
+ case MON_S_FAILED:
+ return mon_failed_str;
+ break;
+ }
+ return NULL;
+}
+
+static const char * mon_res_event_to_str(struct cs_fsm* fsm,
+ int32_t event)
+{
+ switch (event) {
+ case MON_E_CONFIG_CHANGED:
+ return mon_config_changed_str;
+ break;
+ case MON_E_FAILURE:
+ return mon_failure_str;
+ break;
+ }
+ return NULL;
+}
+
+static void mon_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
+ int32_t next_state, int32_t fsm_event, void *data)
+{
+ switch (cb_event) {
+ case CS_FSM_CB_EVENT_PROCESS_NF:
+ log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
+ fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET:
+ log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
+ fsm->name,
+ fsm->event_to_str(fsm, fsm_event),
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state));
+ break;
+ case CS_FSM_CB_EVENT_STATE_SET_NF:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
+ fsm->name,
+ fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
+ fsm->state_to_str(fsm, next_state),
+ fsm->event_to_str(fsm, fsm_event));
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ default:
+ log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Can't find callback event!");
+ corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
+ break;
+ }
+}
+
+static void mon_fsm_state_set (struct cs_fsm* fsm,
+ enum mon_resource_state next_state, struct resource_instance* inst)
+{
+ enum mon_resource_state prev_state = fsm->curr_state;
+ const char *state_str;
+ char key_name[ICMAP_KEYNAME_MAXLEN];
+
+ ENTER();
+
+ cs_fsm_state_set(fsm, next_state, inst, mon_fsm_cb);
+
+ if (prev_state == fsm->curr_state) {
+ return;
+ }
+ state_str = mon_res_state_to_str(fsm, fsm->curr_state);
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
+ icmap_set_string(key_name, state_str);
+}
+
+
+static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
+{
+ struct resource_instance * inst = (struct resource_instance *)data;
+ char *tmp_str;
+ uint64_t tmp_value;
+ char key_name[ICMAP_KEYNAME_MAXLEN];
+ int run_updater;
+ int scanf_res = 0;
+ int32_t i32;
+ double dbl;
+
+ ENTER();
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
+ if (icmap_get_string(key_name, &tmp_str) == CS_OK) {
+ scanf_res = sscanf(tmp_str, "%"PRIu64, &tmp_value);
+ if (scanf_res != 1) {
+ log_printf (LOGSYS_LEVEL_WARNING,
+ "Could NOT use poll_period: %s (not uint64 type) for resource %s",
+ tmp_str, inst->name);
+ }
+ free(tmp_str);
+
+ if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
+ log_printf (LOGSYS_LEVEL_DEBUG,
+ "poll_period changing from:%"PRIu64" to %"PRIu64".",
+ inst->period, tmp_value);
+ inst->period = tmp_value;
+ } else {
+ log_printf (LOGSYS_LEVEL_WARNING,
+ "Could NOT use poll_period:%"PRIu64" ms for resource %s",
+ tmp_value, inst->name);
+ }
+ }
+
+ if (inst->timer_handle) {
+ api->timer_delete(inst->timer_handle);
+ inst->timer_handle = 0;
+ }
+
+ run_updater = 0;
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "max");
+
+ if (icmap_get_string(key_name, &tmp_str) == CS_OK) {
+ if (inst->max_type == ICMAP_VALUETYPE_INT32) {
+ if (sscanf(tmp_str, "%"PRId32, &i32) != 1) {
+ inst->max.int32 = INT32_MAX;
+
+ mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
+ } else {
+ inst->max.int32 = i32;
+ run_updater = 1;
+ }
+ }
+ if (inst->max_type == ICMAP_VALUETYPE_DOUBLE) {
+ if (sscanf(tmp_str, "%lf", &dbl) != 1) {
+ inst->max.dbl = INT32_MAX;
+
+ mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
+ } else {
+ inst->max.dbl = dbl;
+ run_updater = 1;
+ }
+ }
+ free(tmp_str);
+ }
+
+ if (run_updater) {
+ mon_fsm_state_set (fsm, MON_S_RUNNING, inst);
+ /*
+ * run the updater, incase the period has shortened
+ * and to start the timer.
+ */
+ inst->update_stats_fn (inst);
+ }
+}
+
+void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
+{
+ struct resource_instance * inst = (struct resource_instance *)data;
+ ENTER();
+ mon_fsm_state_set (fsm, MON_S_FAILED, inst);
+}
+
+static int32_t percent_mem_used_get(void)
+{
+ sg_mem_stats *mem_stats;
+ sg_swap_stats *swap_stats;
+ long long total, freemem;
+
+#ifdef HAVE_LIBSTATGRAB_GE_090
+ mem_stats = sg_get_mem_stats(NULL);
+ swap_stats = sg_get_swap_stats(NULL);
+#else
+ mem_stats = sg_get_mem_stats();
+ swap_stats = sg_get_swap_stats();
+#endif
+
+ if (mem_stats == NULL || swap_stats == NULL) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s",
+ sg_str_error(sg_get_error()));
+ return -1;
+ }
+ total = mem_stats->total + swap_stats->total;
+ freemem = mem_stats->free + swap_stats->free;
+ return ((total - freemem) * 100) / total;
+}
+
+static void mem_update_stats_fn (void *data)
+{
+ struct resource_instance * inst = (struct resource_instance *)data;
+ int32_t new_value;
+ uint64_t timestamp;
+ char key_name[ICMAP_KEYNAME_MAXLEN];
+
+ new_value = percent_mem_used_get();
+ if (new_value > 0) {
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
+ icmap_set_uint32(key_name, new_value);
+
+ timestamp = cs_timestamp_get();
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
+ icmap_set_uint64(key_name, timestamp);
+
+ if (new_value > inst->max.int32 && inst->fsm.curr_state != MON_S_FAILED) {
+ cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb);
+ }
+ }
+ api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
+ inst, inst->update_stats_fn, &inst->timer_handle);
+}
+
+static double min15_loadavg_get(void)
+{
+ sg_load_stats *load_stats;
+
+#ifdef HAVE_LIBSTATGRAB_GE_090
+ load_stats = sg_get_load_stats (NULL);
+#else
+ load_stats = sg_get_load_stats ();
+#endif
+ if (load_stats == NULL) {
+ log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s",
+ sg_str_error (sg_get_error()));
+ return -1;
+ }
+ return load_stats->min15;
+}
+
+static void load_update_stats_fn (void *data)
+{
+ struct resource_instance * inst = (struct resource_instance *)data;
+ uint64_t timestamp;
+ char key_name[ICMAP_KEYNAME_MAXLEN];
+ double min15 = min15_loadavg_get();
+
+ if (min15 > 0) {
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
+ icmap_set_double(key_name, min15);
+
+ timestamp = cs_timestamp_get();
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
+ icmap_set_uint64(key_name, timestamp);
+
+ if (min15 > inst->max.dbl && inst->fsm.curr_state != MON_S_FAILED) {
+ cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb);
+ }
+ }
+
+ api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
+ inst, inst->update_stats_fn, &inst->timer_handle);
+}
+
+static void mon_key_changed_cb (
+ int32_t event,
+ const char *key_name,
+ struct icmap_notify_value new_value,
+ struct icmap_notify_value old_value,
+ void *user_data)
+{
+ struct resource_instance* inst = (struct resource_instance*)user_data;
+ char *last_key_part;
+
+ if (event == ICMAP_TRACK_DELETE && inst) {
+ log_printf (LOGSYS_LEVEL_WARNING,
+ "resource \"%s\" deleted from cmap!",
+ inst->name);
+
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
+ }
+
+ if (event == ICMAP_TRACK_MODIFY) {
+ last_key_part = strrchr(key_name, '.');
+ if (last_key_part == NULL)
+ return ;
+
+ last_key_part++;
+ if (strcmp(last_key_part, "max") == 0 ||
+ strcmp(last_key_part, "poll_period") == 0) {
+ ENTER();
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
+ }
+ }
+}
+
+static void mon_instance_init (struct resource_instance* inst)
+{
+ uint64_t tmp_value;
+ char key_name[ICMAP_KEYNAME_MAXLEN];
+ icmap_track_t icmap_track = NULL;
+ char *tmp_str;
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
+ if (inst->max_type == ICMAP_VALUETYPE_INT32) {
+ icmap_set_int32(key_name, 0);
+ } else {
+ icmap_set_double(key_name, 0);
+ }
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
+ icmap_set_uint64(key_name, 0);
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
+ icmap_set_string(key_name, mon_stopped_str);
+
+ inst->fsm.name = inst->name;
+ inst->fsm.curr_entry = 0;
+ inst->fsm.curr_state = MON_S_STOPPED;
+ inst->fsm.table = mon_fsm_table;
+ inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
+ inst->fsm.state_to_str = mon_res_state_to_str;
+ inst->fsm.event_to_str = mon_res_event_to_str;
+
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
+ if (icmap_get_string(key_name, &tmp_str) != CS_OK ||
+ sscanf(tmp_str, "%"PRIu64, &tmp_value) != 1) {
+ icmap_set_uint64(key_name, inst->period);
+ }
+ else {
+ if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
+ inst->period = tmp_value;
+ } else {
+ log_printf (LOGSYS_LEVEL_WARNING,
+ "Could NOT use poll_period:%"PRIu64" ms for resource %s",
+ tmp_value, inst->name);
+ }
+ free(tmp_str);
+ }
+ cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
+
+ icmap_track_add(inst->icmap_path,
+ ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
+ mon_key_changed_cb, inst, &icmap_track);
+}
+
+static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api)
+{
+#ifdef HAVE_LIBSTATGRAB_GE_090
+ sg_init(1);
+#else
+ sg_init();
+#endif
+
+ api = corosync_api;
+
+ mon_instance_init (&memory_used_inst);
+ mon_instance_init (&load_15min_inst);
+
+ return NULL;
+}
+
+