diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-07-20 04:50:01 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-07-20 04:50:01 +0000 |
commit | cd4377fab21e0f500bef7f06543fa848a039c1e0 (patch) | |
tree | ba00a55e430c052d6bed0b61c0f8bbe8ebedd313 /collectors/freeipmi.plugin/freeipmi_plugin.c | |
parent | Releasing debian version 1.40.1-1. (diff) | |
download | netdata-cd4377fab21e0f500bef7f06543fa848a039c1e0.tar.xz netdata-cd4377fab21e0f500bef7f06543fa848a039c1e0.zip |
Merging upstream version 1.41.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/freeipmi.plugin/freeipmi_plugin.c')
-rw-r--r-- | collectors/freeipmi.plugin/freeipmi_plugin.c | 2512 |
1 files changed, 1268 insertions, 1244 deletions
diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c index a2251891a..bfd867cc9 100644 --- a/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later /* * netdata freeipmi.plugin - * Copyright (C) 2017 Costa Tsaousis + * Copyright (C) 2023 Netdata Inc. * GPL v3+ * * Based on: @@ -15,9 +15,59 @@ * UCRL-CODE-222073 */ +// ---------------------------------------------------------------------------- +// BEGIN NETDATA CODE + +// #define NETDATA_TIMING_REPORT 1 #include "libnetdata/libnetdata.h" #include "libnetdata/required_dummies.h" +// component names, based on our patterns +#define NETDATA_SENSOR_COMPONENT_MEMORY_MODULE "Memory Module" +#define NETDATA_SENSOR_COMPONENT_MEMORY "Memory" +#define NETDATA_SENSOR_COMPONENT_PROCESSOR "Processor" +#define NETDATA_SENSOR_COMPONENT_IPU "Image Processor" +#define NETDATA_SENSOR_COMPONENT_STORAGE "Storage" +#define NETDATA_SENSOR_COMPONENT_MOTHERBOARD "Motherboard" +#define NETDATA_SENSOR_COMPONENT_NETWORK "Network" +#define NETDATA_SENSOR_COMPONENT_POWER_SUPPLY "Power Supply" +#define NETDATA_SENSOR_COMPONENT_SYSTEM "System" +#define NETDATA_SENSOR_COMPONENT_PERIPHERAL "Peripheral" + +// netdata plugin defaults +#define SENSORS_DICT_KEY_SIZE 2048 // the max size of the key for the dictionary of sensors +#define SPEED_TEST_ITERATIONS 5 // how many times to repeat data collection to decide latency +#define IPMI_SENSORS_DASHBOARD_PRIORITY 90000 // the priority of the sensors charts on the dashboard +#define IPMI_SEL_DASHBOARD_PRIORITY 99000 // the priority of the SEL events chart on the dashboard +#define IPMI_SENSORS_MIN_UPDATE_EVERY 5 // the minimum data collection frequency for sensors +#define IPMI_SEL_MIN_UPDATE_EVERY 30 // the minimum data collection frequency for SEL events +#define IPMI_ENABLE_SEL_BY_DEFAULT true // true/false, to enable/disable SEL by default +#define IPMI_RESTART_EVERY_SECONDS 14400 // restart the plugin every this many seconds + // this is to prevent possible bugs/leaks in ipmi libraries +#define IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS (10 * 60) // stale data collection detection time + +// forward definition of functions and structures +struct netdata_ipmi_state; +static void netdata_update_ipmi_sensor_reading( + int record_id + , int sensor_number + , int sensor_type + , int sensor_state + , int sensor_units + , int sensor_reading_type + , char *sensor_name + , void *sensor_reading + , int event_reading_type_code + , int sensor_bitmask_type + , int sensor_bitmask + , char **sensor_bitmask_strings + , struct netdata_ipmi_state *state +); +static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events); + +// END NETDATA CODE +// ---------------------------------------------------------------------------- + #include <stdio.h> #include <stdlib.h> #include <stdint.h> @@ -27,22 +77,9 @@ #include <unistd.h> #include <sys/time.h> -#define IPMI_PARSE_DEVICE_LAN_STR "lan" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR "lan_2_0" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR2 "lan20" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR3 "lan_20" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR4 "lan2_0" -#define IPMI_PARSE_DEVICE_LAN_2_0_STR5 "lanplus" -#define IPMI_PARSE_DEVICE_KCS_STR "kcs" -#define IPMI_PARSE_DEVICE_SSIF_STR "ssif" -#define IPMI_PARSE_DEVICE_OPENIPMI_STR "openipmi" -#define IPMI_PARSE_DEVICE_OPENIPMI_STR2 "open" -#define IPMI_PARSE_DEVICE_SUNBMC_STR "sunbmc" -#define IPMI_PARSE_DEVICE_SUNBMC_STR2 "bmc" -#define IPMI_PARSE_DEVICE_INTELDCMI_STR "inteldcmi" - #include <ipmi_monitoring.h> #include <ipmi_monitoring_bitmasks.h> +#include <ipmi_monitoring_offsets.h> /* Communication Configuration - Initialize accordingly */ @@ -50,53 +87,38 @@ char *hostname = NULL; /* In-band Communication Configuration */ -int driver_type = -1; // IPMI_MONITORING_DRIVER_TYPE_KCS; /* or -1 for default */ -int disable_auto_probe = 0; /* probe for in-band device */ -unsigned int driver_address = 0; /* not used if probing */ -unsigned int register_spacing = 0; /* not used if probing */ -char *driver_device = NULL; /* not used if probing */ +int driver_type = -1; // IPMI_MONITORING_DRIVER_TYPE_KCS, etc. or -1 for default +int disable_auto_probe = 0; /* probe for in-band device */ +unsigned int driver_address = 0; /* not used if probing */ +unsigned int register_spacing = 0; /* not used if probing */ +char *driver_device = NULL; /* not used if probing */ /* Out-of-band Communication Configuration */ -int protocol_version = -1; //IPMI_MONITORING_PROTOCOL_VERSION_1_5; /* or -1 for default */ -char *username = "foousername"; -char *password = "foopassword"; -unsigned char *ipmi_k_g = NULL; -unsigned int ipmi_k_g_len = 0; -int privilege_level = -1; // IPMI_MONITORING_PRIVILEGE_LEVEL_USER; /* or -1 for default */ -int authentication_type = -1; // IPMI_MONITORING_AUTHENTICATION_TYPE_MD5; /* or -1 for default */ -int cipher_suite_id = 0; /* or -1 for default */ +int protocol_version = -1; // IPMI_MONITORING_PROTOCOL_VERSION_1_5, etc. or -1 for default +char *username = ""; +char *password = ""; +unsigned char *k_g = NULL; +unsigned int k_g_len = 0; +int privilege_level = -1; // IPMI_MONITORING_PRIVILEGE_LEVEL_USER, etc. or -1 for default +int authentication_type = -1; // IPMI_MONITORING_AUTHENTICATION_TYPE_MD5, etc. or -1 for default +int cipher_suite_id = -1; /* 0 or -1 for default */ int session_timeout = 0; /* 0 for default */ int retransmission_timeout = 0; /* 0 for default */ /* Workarounds - specify workaround flags if necessary */ unsigned int workaround_flags = 0; -/* Initialize w/ record id numbers to only monitor specific record ids */ -unsigned int record_ids[] = {0}; -unsigned int record_ids_length = 0; - -/* Initialize w/ sensor types to only monitor specific sensor types - * see ipmi_monitoring.h sensor types list. - */ -unsigned int sensor_types[] = {0}; -unsigned int sensor_types_length = 0; - /* Set to an appropriate alternate if desired */ char *sdr_cache_directory = "/tmp"; +char *sdr_sensors_cache_format = ".netdata-freeipmi-sensors-%H-on-%L.sdr"; +char *sdr_sel_cache_format = ".netdata-freeipmi-sel-%H-on-%L.sdr"; char *sensor_config_file = NULL; +char *sel_config_file = NULL; -/* Set to 1 or 0 to enable these sensor reading flags - * - See ipmi_monitoring.h for descriptions of these flags. - */ -int reread_sdr_cache = 0; -int ignore_non_interpretable_sensors = 0; -int bridge_sensors = 0; -int interpret_oem_data = 0; -int shared_sensors = 0; -int discrete_reading = 1; -int ignore_scanning_disabled = 0; -int assume_bmc_owner = 0; -int entity_sensor_names = 0; +// controlled via command line options +unsigned int global_sel_flags = IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE; +unsigned int global_sensor_reading_flags = IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING|IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; +bool remove_reread_sdr_after_first_use = true; /* Initialization flags * @@ -106,26 +128,10 @@ int entity_sensor_names = 0; */ unsigned int ipmimonitoring_init_flags = 0; -int errnum; - -// ---------------------------------------------------------------------------- -// SEL only variables - -/* Initialize w/ date range to only monitoring specific date range */ -char *date_begin = NULL; /* use MM/DD/YYYY format */ -char *date_end = NULL; /* use MM/DD/YYYY format */ - -int assume_system_event_record = 0; - -char *sel_config_file = NULL; - - // ---------------------------------------------------------------------------- // functions common to sensors and SEL -static void -_init_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) -{ +static void initialize_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) { fatal_assert(ipmi_config); ipmi_config->driver_type = driver_type; @@ -137,8 +143,8 @@ _init_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) ipmi_config->protocol_version = protocol_version; ipmi_config->username = username; ipmi_config->password = password; - ipmi_config->k_g = ipmi_k_g; - ipmi_config->k_g_len = ipmi_k_g_len; + ipmi_config->k_g = k_g; + ipmi_config->k_g_len = k_g_len; ipmi_config->privilege_level = privilege_level; ipmi_config->authentication_type = authentication_type; ipmi_config->cipher_suite_id = cipher_suite_id; @@ -148,414 +154,566 @@ _init_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) ipmi_config->workaround_flags = workaround_flags; } -#ifdef NETDATA_COMMENTED -static const char * -_get_sensor_type_string (int sensor_type) -{ - switch (sensor_type) - { +static const char *netdata_ipmi_get_sensor_type_string (int sensor_type, const char **component) { + switch (sensor_type) { case IPMI_MONITORING_SENSOR_TYPE_RESERVED: return ("Reserved"); + case IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE: return ("Temperature"); + case IPMI_MONITORING_SENSOR_TYPE_VOLTAGE: return ("Voltage"); + case IPMI_MONITORING_SENSOR_TYPE_CURRENT: return ("Current"); + case IPMI_MONITORING_SENSOR_TYPE_FAN: return ("Fan"); + case IPMI_MONITORING_SENSOR_TYPE_PHYSICAL_SECURITY: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Physical Security"); + case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_SECURITY_VIOLATION_ATTEMPT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Platform Security Violation Attempt"); + case IPMI_MONITORING_SENSOR_TYPE_PROCESSOR: + *component = NETDATA_SENSOR_COMPONENT_PROCESSOR; return ("Processor"); + case IPMI_MONITORING_SENSOR_TYPE_POWER_SUPPLY: + *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY; return ("Power Supply"); + case IPMI_MONITORING_SENSOR_TYPE_POWER_UNIT: + *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY; return ("Power Unit"); + case IPMI_MONITORING_SENSOR_TYPE_COOLING_DEVICE: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Cooling Device"); + case IPMI_MONITORING_SENSOR_TYPE_OTHER_UNITS_BASED_SENSOR: return ("Other Units Based Sensor"); + case IPMI_MONITORING_SENSOR_TYPE_MEMORY: + *component = NETDATA_SENSOR_COMPONENT_MEMORY; return ("Memory"); + case IPMI_MONITORING_SENSOR_TYPE_DRIVE_SLOT: + *component = NETDATA_SENSOR_COMPONENT_STORAGE; return ("Drive Slot"); + case IPMI_MONITORING_SENSOR_TYPE_POST_MEMORY_RESIZE: + *component = NETDATA_SENSOR_COMPONENT_MEMORY; return ("POST Memory Resize"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_FIRMWARE_PROGRESS: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("System Firmware Progress"); + case IPMI_MONITORING_SENSOR_TYPE_EVENT_LOGGING_DISABLED: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Event Logging Disabled"); + case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG1: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Watchdog 1"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_EVENT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("System Event"); + case IPMI_MONITORING_SENSOR_TYPE_CRITICAL_INTERRUPT: return ("Critical Interrupt"); + case IPMI_MONITORING_SENSOR_TYPE_BUTTON_SWITCH: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Button/Switch"); + case IPMI_MONITORING_SENSOR_TYPE_MODULE_BOARD: return ("Module/Board"); + case IPMI_MONITORING_SENSOR_TYPE_MICROCONTROLLER_COPROCESSOR: + *component = NETDATA_SENSOR_COMPONENT_PROCESSOR; return ("Microcontroller/Coprocessor"); + case IPMI_MONITORING_SENSOR_TYPE_ADD_IN_CARD: return ("Add In Card"); + case IPMI_MONITORING_SENSOR_TYPE_CHASSIS: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Chassis"); + case IPMI_MONITORING_SENSOR_TYPE_CHIP_SET: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Chip Set"); + case IPMI_MONITORING_SENSOR_TYPE_OTHER_FRU: return ("Other Fru"); + case IPMI_MONITORING_SENSOR_TYPE_CABLE_INTERCONNECT: return ("Cable/Interconnect"); + case IPMI_MONITORING_SENSOR_TYPE_TERMINATOR: return ("Terminator"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_BOOT_INITIATED: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("System Boot Initiated"); + case IPMI_MONITORING_SENSOR_TYPE_BOOT_ERROR: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Boot Error"); + case IPMI_MONITORING_SENSOR_TYPE_OS_BOOT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("OS Boot"); + case IPMI_MONITORING_SENSOR_TYPE_OS_CRITICAL_STOP: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("OS Critical Stop"); + case IPMI_MONITORING_SENSOR_TYPE_SLOT_CONNECTOR: return ("Slot/Connector"); + case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_ACPI_POWER_STATE: return ("System ACPI Power State"); + case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG2: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Watchdog 2"); + case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_ALERT: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Platform Alert"); + case IPMI_MONITORING_SENSOR_TYPE_ENTITY_PRESENCE: return ("Entity Presence"); + case IPMI_MONITORING_SENSOR_TYPE_MONITOR_ASIC_IC: return ("Monitor ASIC/IC"); + case IPMI_MONITORING_SENSOR_TYPE_LAN: + *component = NETDATA_SENSOR_COMPONENT_NETWORK; return ("LAN"); + case IPMI_MONITORING_SENSOR_TYPE_MANAGEMENT_SUBSYSTEM_HEALTH: + *component = NETDATA_SENSOR_COMPONENT_SYSTEM; return ("Management Subsystem Health"); + case IPMI_MONITORING_SENSOR_TYPE_BATTERY: return ("Battery"); + case IPMI_MONITORING_SENSOR_TYPE_SESSION_AUDIT: return ("Session Audit"); + case IPMI_MONITORING_SENSOR_TYPE_VERSION_CHANGE: return ("Version Change"); + case IPMI_MONITORING_SENSOR_TYPE_FRU_STATE: return ("FRU State"); - } - - return ("Unrecognized"); -} -#endif // NETDATA_COMMENTED + case IPMI_MONITORING_SENSOR_TYPE_UNKNOWN: + return ("Unknown"); -// ---------------------------------------------------------------------------- -// BEGIN NETDATA CODE + default: + if(sensor_type >= IPMI_MONITORING_SENSOR_TYPE_OEM_MIN && sensor_type <= IPMI_MONITORING_SENSOR_TYPE_OEM_MAX) + return ("OEM"); -static int debug = 0; + return ("Unrecognized"); + } +} -static int netdata_update_every = 5; // this is the minimum update frequency -static int netdata_priority = 90000; -static int netdata_do_sel = 1; +#define netdata_ipmi_get_value_int(var, func, ctx) do { \ + (var) = func(ctx); \ + if( (var) < 0) { \ + collector_error("%s(): call to " #func " failed: %s", \ + __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \ + goto cleanup; \ + } \ + timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \ +} while(0) + +#define netdata_ipmi_get_value_ptr(var, func, ctx) do { \ + (var) = func(ctx); \ + if(!(var)) { \ + collector_error("%s(): call to " #func " failed: %s", \ + __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \ + goto cleanup; \ + } \ + timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \ +} while(0) + +#define netdata_ipmi_get_value_no_check(var, func, ctx) do { \ + (var) = func(ctx); \ + timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \ +} while(0) + +static int netdata_read_ipmi_sensors(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) { + timing_init(); -static size_t netdata_sensors_updated = 0; -static size_t netdata_sensors_collected = 0; -static size_t netdata_sel_events = 0; -static size_t netdata_sensors_states_nominal = 0; -static size_t netdata_sensors_states_warning = 0; -static size_t netdata_sensors_states_critical = 0; + ipmi_monitoring_ctx_t ctx = NULL; + unsigned int sensor_reading_flags = global_sensor_reading_flags; + int i; + int sensor_count; + int rv = -1; -struct sensor { - int record_id; - int sensor_number; - int sensor_type; - int sensor_state; - int sensor_units; - char *sensor_name; + if (!(ctx = ipmi_monitoring_ctx_create ())) { + collector_error("ipmi_monitoring_ctx_create()"); + goto cleanup; + } - int sensor_reading_type; - union { - uint8_t bool_value; - uint32_t uint32_value; - double double_value; - } sensor_reading; + timing_step(TIMING_STEP_FREEIPMI_CTX_CREATE); - int sent; - int ignore; - int exposed; - int updated; - struct sensor *next; -} *sensors_root = NULL; + if (sdr_cache_directory) { + if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) { + collector_error("ipmi_monitoring_ctx_sdr_cache_directory(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + if (sdr_sensors_cache_format) { + if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sensors_cache_format) < 0) { + collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } -static void netdata_mark_as_not_updated() { - struct sensor *sn; - for(sn = sensors_root; sn ;sn = sn->next) - sn->updated = sn->sent = 0; + timing_step(TIMING_STEP_FREEIPMI_DSR_CACHE_DIR); - netdata_sensors_updated = 0; - netdata_sensors_collected = 0; - netdata_sel_events = 0; + // Must call otherwise only default interpretations ever used + // sensor_config_file can be NULL + if (ipmi_monitoring_ctx_sensor_config_file (ctx, sensor_config_file) < 0) { + collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } - netdata_sensors_states_nominal = 0; - netdata_sensors_states_warning = 0; - netdata_sensors_states_critical = 0; -} + timing_step(TIMING_STEP_FREEIPMI_SENSOR_CONFIG_FILE); + + if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, + hostname, + ipmi_config, + sensor_reading_flags, + NULL, + 0, + NULL, + NULL)) < 0) { + collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } -static void send_chart_to_netdata_for_units(int units) { - struct sensor *sn, *sn_stored; - int dupfound, multiplier; + timing_step(TIMING_STEP_FREEIPMI_SENSOR_READINGS_BY_X); - switch(units) { - case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: - printf("CHART ipmi.temperatures_c '' 'System Celsius Temperatures read by IPMI' 'Celsius' 'temperatures' 'ipmi.temperatures_c' 'line' %d %d\n" - , netdata_priority + 10 - , netdata_update_every - ); - break; + for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next (ctx)) { + int record_id, sensor_number, sensor_type, sensor_state, sensor_units, + sensor_bitmask_type, sensor_bitmask, event_reading_type_code, sensor_reading_type; - case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: - printf("CHART ipmi.temperatures_f '' 'System Fahrenheit Temperatures read by IPMI' 'Fahrenheit' 'temperatures' 'ipmi.temperatures_f' 'line' %d %d\n" - , netdata_priority + 11 - , netdata_update_every - ); - break; + char **sensor_bitmask_strings = NULL; + char *sensor_name = NULL; + void *sensor_reading; - case IPMI_MONITORING_SENSOR_UNITS_VOLTS: - printf("CHART ipmi.volts '' 'System Voltages read by IPMI' 'Volts' 'voltages' 'ipmi.voltages' 'line' %d %d\n" - , netdata_priority + 12 - , netdata_update_every - ); - break; + netdata_ipmi_get_value_int(record_id, ipmi_monitoring_sensor_read_record_id, ctx); + netdata_ipmi_get_value_int(sensor_number, ipmi_monitoring_sensor_read_sensor_number, ctx); + netdata_ipmi_get_value_int(sensor_type, ipmi_monitoring_sensor_read_sensor_type, ctx); + netdata_ipmi_get_value_ptr(sensor_name, ipmi_monitoring_sensor_read_sensor_name, ctx); + netdata_ipmi_get_value_int(sensor_state, ipmi_monitoring_sensor_read_sensor_state, ctx); + netdata_ipmi_get_value_int(sensor_units, ipmi_monitoring_sensor_read_sensor_units, ctx); + netdata_ipmi_get_value_int(sensor_bitmask_type, ipmi_monitoring_sensor_read_sensor_bitmask_type, ctx); + netdata_ipmi_get_value_int(sensor_bitmask, ipmi_monitoring_sensor_read_sensor_bitmask, ctx); + // it's ok for this to be NULL, i.e. sensor_bitmask == IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN + netdata_ipmi_get_value_no_check(sensor_bitmask_strings, ipmi_monitoring_sensor_read_sensor_bitmask_strings, ctx); + netdata_ipmi_get_value_int(sensor_reading_type, ipmi_monitoring_sensor_read_sensor_reading_type, ctx); + // whatever we read from the sensor, it is ok + netdata_ipmi_get_value_no_check(sensor_reading, ipmi_monitoring_sensor_read_sensor_reading, ctx); + netdata_ipmi_get_value_int(event_reading_type_code, ipmi_monitoring_sensor_read_event_reading_type_code, ctx); + + netdata_update_ipmi_sensor_reading( + record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type, sensor_name, + sensor_reading, event_reading_type_code, sensor_bitmask_type, sensor_bitmask, sensor_bitmask_strings, + state + ); - case IPMI_MONITORING_SENSOR_UNITS_AMPS: - printf("CHART ipmi.amps '' 'System Current read by IPMI' 'Amps' 'current' 'ipmi.amps' 'line' %d %d\n" - , netdata_priority + 13 - , netdata_update_every - ); - break; +#ifdef NETDATA_COMMENTED + /* It is possible you may want to monitor specific event + * conditions that may occur. If that is the case, you may want + * to check out what specific bitmask type and bitmask events + * occurred. See ipmi_monitoring_bitmasks.h for a list of + * bitmasks and types. + */ - case IPMI_MONITORING_SENSOR_UNITS_RPM: - printf("CHART ipmi.rpm '' 'System Fans read by IPMI' 'RPM' 'fans' 'ipmi.rpm' 'line' %d %d\n" - , netdata_priority + 14 - , netdata_update_every - ); - break; + if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN) + printf (", %Xh", sensor_bitmask); + else + printf (", N/A"); - case IPMI_MONITORING_SENSOR_UNITS_WATTS: - printf("CHART ipmi.watts '' 'System Power read by IPMI' 'Watts' 'power' 'ipmi.watts' 'line' %d %d\n" - , netdata_priority + 5 - , netdata_update_every - ); - break; + if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN + && sensor_bitmask_strings) + { + unsigned int i = 0; - case IPMI_MONITORING_SENSOR_UNITS_PERCENT: - printf("CHART ipmi.percent '' 'System Metrics read by IPMI' '%%' 'other' 'ipmi.percent' 'line' %d %d\n" - , netdata_priority + 15 - , netdata_update_every - ); - break; + printf (","); - default: - for(sn = sensors_root; sn; sn = sn->next) - if(sn->sensor_units == units) - sn->ignore = 1; - return; - } + while (sensor_bitmask_strings[i]) + { + printf (" "); - for(sn = sensors_root; sn; sn = sn->next) { - dupfound = 0; - if(sn->sensor_units == units && sn->updated && !sn->ignore) { - sn->exposed = 1; - multiplier = 1; - - switch(sn->sensor_reading_type) { - case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: - multiplier = 1000; - // fallthrough - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: - for (sn_stored = sensors_root; sn_stored; sn_stored = sn_stored->next) { - if (sn_stored == sn) continue; - // If the name is a duplicate, append the sensor number - if ( !strcmp(sn_stored->sensor_name, sn->sensor_name) ) { - dupfound = 1; - printf("DIMENSION i%d_n%d_r%d '%s i%d' absolute 1 %d\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_name - , sn->sensor_number - , multiplier - ); - break; - } - } - // No duplicate name was found, display it just with Name - if (!dupfound) { - // display without ID - printf("DIMENSION i%d_n%d_r%d '%s' absolute 1 %d\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_name - , multiplier - ); - } - break; + printf ("'%s'", + sensor_bitmask_strings[i]); - default: - sn->ignore = 1; - break; + i++; } } + else + printf (", N/A"); + + printf ("\n"); +#endif // NETDATA_COMMENTED } -} -static void send_metrics_to_netdata_for_units(int units) { - struct sensor *sn; + rv = 0; - switch(units) { - case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: - printf("BEGIN ipmi.temperatures_c\n"); - break; +cleanup: + if (ctx) + ipmi_monitoring_ctx_destroy (ctx); - case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: - printf("BEGIN ipmi.temperatures_f\n"); - break; + timing_report(); - case IPMI_MONITORING_SENSOR_UNITS_VOLTS: - printf("BEGIN ipmi.volts\n"); - break; + if(remove_reread_sdr_after_first_use) + global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE); - case IPMI_MONITORING_SENSOR_UNITS_AMPS: - printf("BEGIN ipmi.amps\n"); - break; + return (rv); +} - case IPMI_MONITORING_SENSOR_UNITS_RPM: - printf("BEGIN ipmi.rpm\n"); - break; - case IPMI_MONITORING_SENSOR_UNITS_WATTS: - printf("BEGIN ipmi.watts\n"); - break; +static int netdata_get_ipmi_sel_events_count(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) { + timing_init(); - case IPMI_MONITORING_SENSOR_UNITS_PERCENT: - printf("BEGIN ipmi.percent\n"); - break; + ipmi_monitoring_ctx_t ctx = NULL; + unsigned int sel_flags = global_sel_flags; + int sel_count; + int rv = -1; - default: - for(sn = sensors_root; sn; sn = sn->next) - if(sn->sensor_units == units) - sn->ignore = 1; - return; + if (!(ctx = ipmi_monitoring_ctx_create ())) { + collector_error("ipmi_monitoring_ctx_create()"); + goto cleanup; } - for(sn = sensors_root; sn; sn = sn->next) { - if(sn->sensor_units == units && sn->updated && !sn->sent && !sn->ignore) { - netdata_sensors_updated++; + if (sdr_cache_directory) { + if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) { + collector_error( "ipmi_monitoring_ctx_sdr_cache_directory(): %s", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } + if (sdr_sel_cache_format) { + if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sel_cache_format) < 0) { + collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } + } - sn->sent = 1; + // Must call otherwise only default interpretations ever used + // sel_config_file can be NULL + if (ipmi_monitoring_ctx_sel_config_file (ctx, sel_config_file) < 0) { + collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } - switch(sn->sensor_reading_type) { - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: - printf("SET i%d_n%d_r%d = %u\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_reading.bool_value - ); - break; + if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, + hostname, + ipmi_config, + sel_flags, + NULL, + 0, + NULL, + NULL)) < 0) { + collector_error( "ipmi_monitoring_sel_by_record_id(): %s", + ipmi_monitoring_ctx_errormsg (ctx)); + goto cleanup; + } - case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: - printf("SET i%d_n%d_r%d = %u\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , sn->sensor_reading.uint32_value - ); - break; + netdata_update_ipmi_sel_events_count(state, sel_count); - case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: - printf("SET i%d_n%d_r%d = %lld\n" - , sn->sensor_number - , sn->record_id - , sn->sensor_reading_type - , (long long int)(sn->sensor_reading.double_value * 1000) - ); - break; + rv = 0; - default: - sn->ignore = 1; - break; - } - } - } +cleanup: + if (ctx) + ipmi_monitoring_ctx_destroy (ctx); + + timing_report(); - printf("END\n"); + if(remove_reread_sdr_after_first_use) + global_sel_flags &= ~(IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE); + + return (rv); } -static void send_metrics_to_netdata() { - static int sel_chart_generated = 0, sensors_states_chart_generated = 0; - struct sensor *sn; +// ---------------------------------------------------------------------------- +// copied from freeipmi codebase commit 8dea6dec4012d0899901e595f2c868a05e1cefed +// added netdata_ in-front to not overwrite library functions + +// FROM: common/miscutil/network.c +static int netdata_host_is_localhost (const char *host) { + /* Ordered by my assumption of most popular */ + if (!strcasecmp (host, "localhost") + || !strcmp (host, "127.0.0.1") + || !strcasecmp (host, "ipv6-localhost") + || !strcmp (host, "::1") + || !strcasecmp (host, "ip6-localhost") + || !strcmp (host, "0:0:0:0:0:0:0:1")) + return (1); - if(netdata_do_sel && !sel_chart_generated) { - sel_chart_generated = 1; - printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d\n" - , netdata_priority + 2 - , netdata_update_every - ); - printf("DIMENSION events '' absolute 1 1\n"); - } + return (0); +} - if(!sensors_states_chart_generated) { - sensors_states_chart_generated = 1; - printf("CHART ipmi.sensors_states '' 'IPMI Sensors State' 'sensors' 'states' ipmi.sensors_states line %d %d\n" - , netdata_priority + 1 - , netdata_update_every - ); - printf("DIMENSION nominal '' absolute 1 1\n"); - printf("DIMENSION critical '' absolute 1 1\n"); - printf("DIMENSION warning '' absolute 1 1\n"); - } +// FROM: common/parsecommon/parse-common.h +#define IPMI_PARSE_DEVICE_LAN_STR "lan" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR "lan_2_0" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR2 "lan20" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR3 "lan_20" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR4 "lan2_0" +#define IPMI_PARSE_DEVICE_LAN_2_0_STR5 "lanplus" +#define IPMI_PARSE_DEVICE_KCS_STR "kcs" +#define IPMI_PARSE_DEVICE_SSIF_STR "ssif" +#define IPMI_PARSE_DEVICE_OPENIPMI_STR "openipmi" +#define IPMI_PARSE_DEVICE_OPENIPMI_STR2 "open" +#define IPMI_PARSE_DEVICE_SUNBMC_STR "sunbmc" +#define IPMI_PARSE_DEVICE_SUNBMC_STR2 "bmc" +#define IPMI_PARSE_DEVICE_INTELDCMI_STR "inteldcmi" - // generate the CHART/DIMENSION lines, if we have to - for(sn = sensors_root; sn; sn = sn->next) - if(sn->updated && !sn->exposed && !sn->ignore) - send_chart_to_netdata_for_units(sn->sensor_units); +// FROM: common/parsecommon/parse-common.c +// changed the return values to match ipmi_monitoring.h +static int netdata_parse_outofband_driver_type (const char *str) { + if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_STR) == 0) + return (IPMI_MONITORING_PROTOCOL_VERSION_1_5); - if(netdata_do_sel) { - printf( - "BEGIN ipmi.events\n" - "SET events = %zu\n" - "END\n" - , netdata_sel_events - ); - } + /* support "lanplus" for those that might be used to ipmitool. + * support typo variants to ease. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR2) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR3) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR4) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR5) == 0) + return (IPMI_MONITORING_PROTOCOL_VERSION_2_0); - printf( - "BEGIN ipmi.sensors_states\n" - "SET nominal = %zu\n" - "SET warning = %zu\n" - "SET critical = %zu\n" - "END\n" - , netdata_sensors_states_nominal - , netdata_sensors_states_warning - , netdata_sensors_states_critical - ); - - // send metrics to netdata - for(sn = sensors_root; sn; sn = sn->next) - if(sn->updated && sn->exposed && !sn->sent && !sn->ignore) - send_metrics_to_netdata_for_units(sn->sensor_units); + return (-1); +} +// FROM: common/parsecommon/parse-common.c +// changed the return values to match ipmi_monitoring.h +static int netdata_parse_inband_driver_type (const char *str) { + if (strcasecmp (str, IPMI_PARSE_DEVICE_KCS_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_KCS); + else if (strcasecmp (str, IPMI_PARSE_DEVICE_SSIF_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_SSIF); + /* support "open" for those that might be used to + * ipmitool. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR2) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_OPENIPMI); + /* support "bmc" for those that might be used to + * ipmitool. + */ + else if (strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR) == 0 + || strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR2) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_SUNBMC); + +#ifdef IPMI_MONITORING_DRIVER_TYPE_INTELDCMI + else if (strcasecmp (str, IPMI_PARSE_DEVICE_INTELDCMI_STR) == 0) + return (IPMI_MONITORING_DRIVER_TYPE_INTELDCMI); +#endif // IPMI_MONITORING_DRIVER_TYPE_INTELDCMI + + return (-1); } +// ---------------------------------------------------------------------------- +// BEGIN NETDATA CODE + +typedef enum __attribute__((packed)) { + IPMI_COLLECT_TYPE_SENSORS = (1 << 0), + IPMI_COLLECT_TYPE_SEL = (1 << 1), +} IPMI_COLLECTION_TYPE; + +struct sensor { + int sensor_type; + int sensor_state; + int sensor_units; + char *sensor_name; + + int sensor_reading_type; + union { + uint8_t bool_value; + uint32_t uint32_value; + double double_value; + } sensor_reading; + + // netdata provided + const char *context; + const char *title; + const char *units; + const char *family; + const char *chart_type; + const char *dimension; + int priority; + + const char *type; + const char *component; + + int multiplier; + bool do_metric; + bool do_state; + bool metric_chart_sent; + bool state_chart_sent; + usec_t last_collected_metric_ut; + usec_t last_collected_state_ut; +}; + +typedef enum __attribute__((packed)) { + ICS_INIT, + ICS_INIT_FAILED, + ICS_RUNNING, + ICS_FAILED, +} IPMI_COLLECTOR_STATUS; + +struct netdata_ipmi_state { + bool debug; + + struct { + IPMI_COLLECTOR_STATUS status; + usec_t last_iteration_ut; + size_t collected; + usec_t now_ut; + usec_t freq_ut; + int priority; + DICTIONARY *dict; + } sensors; + + struct { + IPMI_COLLECTOR_STATUS status; + usec_t last_iteration_ut; + size_t events; + usec_t now_ut; + usec_t freq_ut; + int priority; + } sel; + + struct { + usec_t now_ut; + } updates; +}; + +// ---------------------------------------------------------------------------- +// excluded record ids maintenance (both for sensor data and state) + static int *excluded_record_ids = NULL; size_t excluded_record_ids_length = 0; -static void excluded_record_ids_parse(const char *s) { +static void excluded_record_ids_parse(const char *s, bool debug) { if(!s) return; while(*s) { @@ -567,18 +725,14 @@ static void excluded_record_ids_parse(const char *s) { s = e; if(n != 0) { - excluded_record_ids = realloc(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int)); - if(!excluded_record_ids) { - fprintf(stderr, "freeipmi.plugin: failed to allocate memory. Exiting."); - exit(1); - } + excluded_record_ids = reallocz(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int)); excluded_record_ids[excluded_record_ids_length++] = (int)n; } } } if(debug) { - fprintf(stderr, "freeipmi.plugin: excluded record ids:"); + fprintf(stderr, "%s: excluded record ids:", program_name); size_t i; for(i = 0; i < excluded_record_ids_length; i++) { fprintf(stderr, " %d", excluded_record_ids[i]); @@ -590,7 +744,7 @@ static void excluded_record_ids_parse(const char *s) { static int *excluded_status_record_ids = NULL; size_t excluded_status_record_ids_length = 0; -static void excluded_status_record_ids_parse(const char *s) { +static void excluded_status_record_ids_parse(const char *s, bool debug) { if(!s) return; while(*s) { @@ -602,18 +756,14 @@ static void excluded_status_record_ids_parse(const char *s) { s = e; if(n != 0) { - excluded_status_record_ids = realloc(excluded_status_record_ids, (excluded_status_record_ids_length + 1) * sizeof(int)); - if(!excluded_status_record_ids) { - fprintf(stderr, "freeipmi.plugin: failed to allocate memory. Exiting."); - exit(1); - } + excluded_status_record_ids = reallocz(excluded_status_record_ids, (excluded_status_record_ids_length + 1) * sizeof(int)); excluded_status_record_ids[excluded_status_record_ids_length++] = (int)n; } } } if(debug) { - fprintf(stderr, "freeipmi.plugin: excluded status record ids:"); + fprintf(stderr, "%s: excluded status record ids:", program_name); size_t i; for(i = 0; i < excluded_status_record_ids_length; i++) { fprintf(stderr, " %d", excluded_status_record_ids[i]); @@ -645,959 +795,649 @@ static int excluded_status_record_ids_check(int record_id) { return 0; } -static void netdata_get_sensor( - int record_id - , int sensor_number - , int sensor_type - , int sensor_state - , int sensor_units - , int sensor_reading_type - , char *sensor_name - , void *sensor_reading -) { - // find the sensor record - struct sensor *sn; - for(sn = sensors_root; sn ;sn = sn->next) - if( sn->record_id == record_id && - sn->sensor_number == sensor_number && - sn->sensor_reading_type == sensor_reading_type && - sn->sensor_units == sensor_units && - !strcmp(sn->sensor_name, sensor_name) - ) - break; +// ---------------------------------------------------------------------------- +// data collection functions - if(!sn) { - // not found, create it - // check if it is excluded - if(excluded_record_ids_check(record_id)) { - if(debug) fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name); - return; - } +struct { + const char *search; + SIMPLE_PATTERN *pattern; + const char *label; +} sensors_component_patterns[] = { - if(debug) fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); + // The order is important! + // They are evaluated top to bottom + // The first the matches is used - sn = calloc(1, sizeof(struct sensor)); - if(!sn) { - fatal("cannot allocate %zu bytes of memory.", sizeof(struct sensor)); - } + { + .search = "*DIMM*|*_DIM*|*VTT*|*VDDQ*|*ECC*|*MEM*CRC*|*MEM*BD*", + .label = NETDATA_SENSOR_COMPONENT_MEMORY_MODULE, + }, + { + .search = "*CPU*|SOC_*|*VDDCR*|P*_VDD*|*_DTS|*VCORE*|*PROC*", + .label = NETDATA_SENSOR_COMPONENT_PROCESSOR, + }, + { + .search = "IPU*", + .label = NETDATA_SENSOR_COMPONENT_IPU, + }, + { + .search = "M2_*|*SSD*|*HSC*|*HDD*|*NVME*", + .label = NETDATA_SENSOR_COMPONENT_STORAGE, + }, + { + .search = "MB_*|*PCH*|*VBAT*|*I/O*BD*|*IO*BD*", + .label = NETDATA_SENSOR_COMPONENT_MOTHERBOARD, + }, + { + .search = "Watchdog|SEL|SYS_*|*CHASSIS*", + .label = NETDATA_SENSOR_COMPONENT_SYSTEM, + }, + { + .search = "PS*|P_*|*PSU*|*PWR*|*TERMV*|*D2D*", + .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY, + }, - sn->record_id = record_id; - sn->sensor_number = sensor_number; - sn->sensor_type = sensor_type; - sn->sensor_state = sensor_state; - sn->sensor_units = sensor_units; - sn->sensor_reading_type = sensor_reading_type; - sn->sensor_name = strdup(sensor_name); - if(!sn->sensor_name) { - fatal("cannot allocate %zu bytes of memory.", strlen(sensor_name)); + // fallback components + { + .search = "VR_P*|*VRMP*", + .label = NETDATA_SENSOR_COMPONENT_PROCESSOR, + }, + { + .search = "*VSB*|*PS*", + .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY, + }, + { + .search = "*MEM*|*MEM*RAID*", + .label = NETDATA_SENSOR_COMPONENT_MEMORY, + }, + { + .search = "*RAID*", // there is also "Memory RAID", so keep this after memory + .label = NETDATA_SENSOR_COMPONENT_STORAGE, + }, + { + .search = "*PERIPHERAL*|*USB*", + .label = NETDATA_SENSOR_COMPONENT_PERIPHERAL, + }, + { + .search = "*FAN*|*12V*|*VCC*|*PCI*|*CHIPSET*|*AMP*|*BD*", + .label = NETDATA_SENSOR_COMPONENT_SYSTEM, + }, + + // terminator + { + .search = NULL, + .label = NULL, } +}; - sn->next = sensors_root; - sensors_root = sn; - } - else { - if(debug) fprintf(stderr, "Reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); +static const char *netdata_sensor_name_to_component(const char *sensor_name) { + for(int i = 0; sensors_component_patterns[i].search ;i++) { + if(!sensors_component_patterns[i].pattern) + sensors_component_patterns[i].pattern = simple_pattern_create(sensors_component_patterns[i].search, "|", SIMPLE_PATTERN_EXACT, false); + + if(simple_pattern_matches(sensors_component_patterns[i].pattern, sensor_name)) + return sensors_component_patterns[i].label; } - switch(sensor_reading_type) { + return "Other"; +} + +const char *netdata_collect_type_to_string(IPMI_COLLECTION_TYPE type) { + if((type & (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL)) == (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL)) + return "sensors,sel"; + if(type & IPMI_COLLECT_TYPE_SEL) + return "sel"; + if(type & IPMI_COLLECT_TYPE_SENSORS) + return "sensors"; + + return "unknown"; +} + +static void netdata_sensor_set_value(struct sensor *sn, void *sensor_reading, struct netdata_ipmi_state *state __maybe_unused) { + switch(sn->sensor_reading_type) { case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: sn->sensor_reading.bool_value = *((uint8_t *)sensor_reading); - sn->updated = 1; - netdata_sensors_collected++; break; case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: sn->sensor_reading.uint32_value = *((uint32_t *)sensor_reading); - sn->updated = 1; - netdata_sensors_collected++; break; case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: sn->sensor_reading.double_value = *((double *)sensor_reading); - sn->updated = 1; - netdata_sensors_collected++; break; default: - if(debug) fprintf(stderr, "Unknown reading type - Ignoring sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); - sn->ignore = 1; + case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN: + sn->do_metric = false; break; } +} - // check if it is excluded - if(excluded_status_record_ids_check(record_id)) { - if(debug) fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name); +static void netdata_update_ipmi_sensor_reading( + int record_id + , int sensor_number + , int sensor_type + , int sensor_state + , int sensor_units + , int sensor_reading_type + , char *sensor_name + , void *sensor_reading + , int event_reading_type_code __maybe_unused + , int sensor_bitmask_type __maybe_unused + , int sensor_bitmask __maybe_unused + , char **sensor_bitmask_strings __maybe_unused + , struct netdata_ipmi_state *state +) { + if(unlikely(sensor_state == IPMI_MONITORING_STATE_UNKNOWN && + sensor_type == IPMI_MONITORING_SENSOR_TYPE_UNKNOWN && + sensor_units == IPMI_MONITORING_SENSOR_UNITS_UNKNOWN && + sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN && + (!sensor_name || !*sensor_name))) + // we can't do anything about this sensor - everything is unknown return; - } - switch(sensor_state) { - case IPMI_MONITORING_STATE_NOMINAL: - netdata_sensors_states_nominal++; - break; + if(unlikely(!sensor_name || !*sensor_name)) + sensor_name = "UNNAMED"; - case IPMI_MONITORING_STATE_WARNING: - netdata_sensors_states_warning++; - break; + state->sensors.collected++; - case IPMI_MONITORING_STATE_CRITICAL: - netdata_sensors_states_critical++; - break; + char key[SENSORS_DICT_KEY_SIZE + 1]; + snprintfz(key, SENSORS_DICT_KEY_SIZE, "i%d_n%d_t%d_u%d_%s", + record_id, sensor_number, sensor_reading_type, sensor_units, sensor_name); - default: - break; - } -} + // find the sensor record + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(state->sensors.dict, key); + if(likely(item)) { + // recurring collection -static void netdata_get_sel( - int record_id - , int record_type_class - , int sel_state -) { - (void)record_id; - (void)record_type_class; - (void)sel_state; + if(state->debug) + fprintf(stderr, "%s: reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", + program_name, sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); - netdata_sel_events++; -} + struct sensor *sn = dictionary_acquired_item_value(item); + if(sensor_reading) { + netdata_sensor_set_value(sn, sensor_reading, state); + sn->last_collected_metric_ut = state->sensors.now_ut; + } -// END NETDATA CODE -// ---------------------------------------------------------------------------- + sn->sensor_state = sensor_state; + sn->last_collected_state_ut = state->sensors.now_ut; -static int -_ipmimonitoring_sensors (struct ipmi_monitoring_ipmi_config *ipmi_config) -{ - ipmi_monitoring_ctx_t ctx = NULL; - unsigned int sensor_reading_flags = 0; - int i; - int sensor_count; - int rv = -1; + dictionary_acquired_item_release(state->sensors.dict, item); - if (!(ctx = ipmi_monitoring_ctx_create ())) { - collector_error("ipmi_monitoring_ctx_create()"); - goto cleanup; + return; } - if (sdr_cache_directory) - { - if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, - sdr_cache_directory) < 0) - { - collector_error("ipmi_monitoring_ctx_sdr_cache_directory(): %s\n", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } + if(state->debug) + fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n", + sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type); - /* Must call otherwise only default interpretations ever used */ - if (sensor_config_file) - { - if (ipmi_monitoring_ctx_sensor_config_file (ctx, - sensor_config_file) < 0) - { - collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + // check if it is excluded + bool excluded_metric = excluded_record_ids_check(record_id); + bool excluded_state = excluded_status_record_ids_check(record_id); + + if(excluded_metric) { + if(state->debug) + fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name); } - else - { - if (ipmi_monitoring_ctx_sensor_config_file (ctx, NULL) < 0) - { - collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + + if(excluded_state) { + if(state->debug) + fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name); } - if (reread_sdr_cache) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; + struct sensor t = { + .sensor_type = sensor_type, + .sensor_state = sensor_state, + .sensor_units = sensor_units, + .sensor_reading_type = sensor_reading_type, + .sensor_name = strdupz(sensor_name), + .component = netdata_sensor_name_to_component(sensor_name), + .do_state = !excluded_state, + .do_metric = !excluded_metric, + }; - if (ignore_non_interpretable_sensors) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS; + t.type = netdata_ipmi_get_sensor_type_string(t.sensor_type, &t.component); - if (bridge_sensors) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS; + switch(t.sensor_units) { + case IPMI_MONITORING_SENSOR_UNITS_CELSIUS: + t.dimension = "temperature"; + t.context = "ipmi.sensor_temperature_c"; + t.title = "IPMI Sensor Temperature Celsius"; + t.units = "Celsius"; + t.family = "temperatures"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 10; + break; - if (interpret_oem_data) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA; + case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT: + t.dimension = "temperature"; + t.context = "ipmi.sensor_temperature_f"; + t.title = "IPMI Sensor Temperature Fahrenheit"; + t.units = "Fahrenheit"; + t.family = "temperatures"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 20; + break; - if (shared_sensors) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS; + case IPMI_MONITORING_SENSOR_UNITS_VOLTS: + t.dimension = "voltage"; + t.context = "ipmi.sensor_voltage"; + t.title = "IPMI Sensor Voltage"; + t.units = "Volts"; + t.family = "voltages"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 30; + break; - if (discrete_reading) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING; + case IPMI_MONITORING_SENSOR_UNITS_AMPS: + t.dimension = "ampere"; + t.context = "ipmi.sensor_ampere"; + t.title = "IPMI Sensor Current"; + t.units = "Amps"; + t.family = "current"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 40; + break; - if (ignore_scanning_disabled) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED; + case IPMI_MONITORING_SENSOR_UNITS_RPM: + t.dimension = "rotations"; + t.context = "ipmi.sensor_fan_speed"; + t.title = "IPMI Sensor Fans Speed"; + t.units = "RPM"; + t.family = "fans"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 50; + break; - if (assume_bmc_owner) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER; + case IPMI_MONITORING_SENSOR_UNITS_WATTS: + t.dimension = "power"; + t.context = "ipmi.sensor_power"; + t.title = "IPMI Sensor Power"; + t.units = "Watts"; + t.family = "power"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 60; + break; -#ifdef IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES - if (entity_sensor_names) - sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; -#endif // IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES + case IPMI_MONITORING_SENSOR_UNITS_PERCENT: + t.dimension = "percentage"; + t.context = "ipmi.sensor_reading_percent"; + t.title = "IPMI Sensor Reading Percentage"; + t.units = "%%"; + t.family = "other"; + t.chart_type = "line"; + t.priority = state->sensors.priority + 70; + break; - if (!record_ids_length && !sensor_types_length) - { - if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, - hostname, - ipmi_config, - sensor_reading_flags, - NULL, - 0, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else if (record_ids_length) - { - if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx, - hostname, - ipmi_config, - sensor_reading_flags, - record_ids, - record_ids_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else - { - if ((sensor_count = ipmi_monitoring_sensor_readings_by_sensor_type (ctx, - hostname, - ipmi_config, - sensor_reading_flags, - sensor_types, - sensor_types_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sensor_readings_by_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + default: + t.priority = state->sensors.priority + 80; + t.do_metric = false; + break; } -#ifdef NETDATA_COMMENTED - printf ("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n", - "Record ID", - "Sensor Name", - "Sensor Number", - "Sensor Type", - "Sensor State", - "Sensor Reading", - "Sensor Units", - "Sensor Event/Reading Type Code", - "Sensor Event Bitmask", - "Sensor Event String"); -#endif // NETDATA_COMMENTED - - for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next (ctx)) - { - int record_id, sensor_number, sensor_type, sensor_state, sensor_units, - sensor_reading_type; - -#ifdef NETDATA_COMMENTED - int sensor_bitmask_type, sensor_bitmask, event_reading_type_code; - char **sensor_bitmask_strings = NULL; - const char *sensor_type_str; - const char *sensor_state_str; -#endif // NETDATA_COMMENTED + switch(sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + t.multiplier = 1000; + break; - char *sensor_name = NULL; - void *sensor_reading; + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + t.multiplier = 1; + break; - if ((record_id = ipmi_monitoring_sensor_read_record_id (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + default: + t.do_metric = false; + break; + } - if ((sensor_number = ipmi_monitoring_sensor_read_sensor_number (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_number(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(sensor_reading) { + netdata_sensor_set_value(&t, sensor_reading, state); + t.last_collected_metric_ut = state->sensors.now_ut; + } + t.last_collected_state_ut = state->sensors.now_ut; - if ((sensor_type = ipmi_monitoring_sensor_read_sensor_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + dictionary_set(state->sensors.dict, key, &t, sizeof(t)); +} - if (!(sensor_name = ipmi_monitoring_sensor_read_sensor_name (ctx))) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_name(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } +static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events) { + state->sel.events = events; +} - if ((sensor_state = ipmi_monitoring_sensor_read_sensor_state (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_state(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } +int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) { + errno = 0; - if ((sensor_units = ipmi_monitoring_sensor_read_sensor_units (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_units(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(type & IPMI_COLLECT_TYPE_SENSORS) { + state->sensors.collected = 0; + state->sensors.now_ut = now_monotonic_usec(); -#ifdef NETDATA_COMMENTED - if ((sensor_bitmask_type = ipmi_monitoring_sensor_read_sensor_bitmask_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_bitmask_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - if ((sensor_bitmask = ipmi_monitoring_sensor_read_sensor_bitmask (ctx)) < 0) - { - collector_error("ipmi_monitoring_sensor_read_sensor_bitmask(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if (netdata_read_ipmi_sensors(ipmi_config, state) < 0) return -1; + } - /* it's ok for this to be NULL, i.e. sensor_bitmask == - * IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN - */ - sensor_bitmask_strings = ipmi_monitoring_sensor_read_sensor_bitmask_strings (ctx); - - - -#endif // NETDATA_COMMENTED + if(type & IPMI_COLLECT_TYPE_SEL) { + state->sel.events = 0; + state->sel.now_ut = now_monotonic_usec(); + if(netdata_get_ipmi_sel_events_count(ipmi_config, state) < 0) return -2; + } - if ((sensor_reading_type = ipmi_monitoring_sensor_read_sensor_reading_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_sensor_reading_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + return 0; +} - sensor_reading = ipmi_monitoring_sensor_read_sensor_reading (ctx); +int netdata_ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) { + int i, checks = SPEED_TEST_ITERATIONS, successful = 0; + usec_t total = 0; -#ifdef NETDATA_COMMENTED - if ((event_reading_type_code = ipmi_monitoring_sensor_read_event_reading_type_code (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sensor_read_event_reading_type_code(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } -#endif // NETDATA_COMMENTED + for(i = 0 ; i < checks ; i++) { + if(unlikely(state->debug)) + fprintf(stderr, "%s: checking %s data collection speed iteration %d of %d\n", + program_name, netdata_collect_type_to_string(type), i + 1, checks); - netdata_get_sensor( - record_id - , sensor_number - , sensor_type - , sensor_state - , sensor_units - , sensor_reading_type - , sensor_name - , sensor_reading - ); + // measure the time a data collection needs + usec_t start = now_realtime_usec(); -#ifdef NETDATA_COMMENTED - if (!strlen (sensor_name)) - sensor_name = "N/A"; - - sensor_type_str = _get_sensor_type_string (sensor_type); - - printf ("%d, %s, %d, %s", - record_id, - sensor_name, - sensor_number, - sensor_type_str); - - if (sensor_state == IPMI_MONITORING_STATE_NOMINAL) - sensor_state_str = "Nominal"; - else if (sensor_state == IPMI_MONITORING_STATE_WARNING) - sensor_state_str = "Warning"; - else if (sensor_state == IPMI_MONITORING_STATE_CRITICAL) - sensor_state_str = "Critical"; - else - sensor_state_str = "N/A"; + if(netdata_ipmi_collect_data(ipmi_config, type, state) < 0) + continue; - printf (", %s", sensor_state_str); + usec_t end = now_realtime_usec(); - if (sensor_reading) - { - const char *sensor_units_str; - - if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL) - printf (", %s", - (*((uint8_t *)sensor_reading) ? "true" : "false")); - else if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32) - printf (", %u", - *((uint32_t *)sensor_reading)); - else if (sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE) - printf (", %.2f", - *((double *)sensor_reading)); - else - printf (", N/A"); - - if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_CELSIUS) - sensor_units_str = "C"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT) - sensor_units_str = "F"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_VOLTS) - sensor_units_str = "V"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_AMPS) - sensor_units_str = "A"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_RPM) - sensor_units_str = "RPM"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_WATTS) - sensor_units_str = "W"; - else if (sensor_units == IPMI_MONITORING_SENSOR_UNITS_PERCENT) - sensor_units_str = "%"; - else - sensor_units_str = "N/A"; - - printf (", %s", sensor_units_str); - } - else - printf (", N/A, N/A"); + successful++; - printf (", %Xh", event_reading_type_code); + if(unlikely(state->debug)) + fprintf(stderr, "%s: %s data collection speed was %llu usec\n", + program_name, netdata_collect_type_to_string(type), end - start); - /* It is possible you may want to monitor specific event - * conditions that may occur. If that is the case, you may want - * to check out what specific bitmask type and bitmask events - * occurred. See ipmi_monitoring_bitmasks.h for a list of - * bitmasks and types. - */ + // add it to our total + total += end - start; - if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN) - printf (", %Xh", sensor_bitmask); - else - printf (", N/A"); + // wait the same time + // to avoid flooding the IPMI processor with requests + sleep_usec(end - start); + } - if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN - && sensor_bitmask_strings) - { - unsigned int i = 0; + if(!successful) + return 0; - printf (","); + // so, we assume it needed 2x the time + // we find the average in microseconds + // and we round-up to the closest second - while (sensor_bitmask_strings[i]) - { - printf (" "); + return (int)(( total * 2 / successful / USEC_PER_SEC ) + 1); +} - printf ("'%s'", - sensor_bitmask_strings[i]); +// ---------------------------------------------------------------------------- +// data collection threads - i++; - } - } - else - printf (", N/A"); +struct ipmi_collection_thread { + struct ipmi_monitoring_ipmi_config ipmi_config; + int freq_s; + bool debug; + IPMI_COLLECTION_TYPE type; + SPINLOCK spinlock; + struct netdata_ipmi_state state; +}; - printf ("\n"); -#endif // NETDATA_COMMENTED - } +void *netdata_ipmi_collection_thread(void *ptr) { + struct ipmi_collection_thread *t = ptr; - rv = 0; - cleanup: - if (ctx) - ipmi_monitoring_ctx_destroy (ctx); - return (rv); -} + if(t->debug) fprintf(stderr, "%s: calling initialize_ipmi_config() for %s\n", + program_name, netdata_collect_type_to_string(t->type)); + initialize_ipmi_config(&t->ipmi_config); -static int -_ipmimonitoring_sel (struct ipmi_monitoring_ipmi_config *ipmi_config) -{ - ipmi_monitoring_ctx_t ctx = NULL; - unsigned int sel_flags = 0; - int i; - int sel_count; - int rv = -1; + if(t->debug) fprintf(stderr, "%s: detecting IPMI minimum update frequency for %s...\n", + program_name, netdata_collect_type_to_string(t->type)); - if (!(ctx = ipmi_monitoring_ctx_create ())) - { - collector_error("ipmi_monitoring_ctx_create()"); - goto cleanup; - } - - if (sdr_cache_directory) - { - if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, - sdr_cache_directory) < 0) - { - collector_error( "ipmi_monitoring_ctx_sdr_cache_directory(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + int freq_s = netdata_ipmi_detect_speed_secs(&t->ipmi_config, t->type, &t->state); + if(!freq_s) { + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + t->state.sensors.status = ICS_INIT_FAILED; + t->state.sensors.last_iteration_ut = 0; } - } - /* Must call otherwise only default interpretations ever used */ - if (sel_config_file) - { - if (ipmi_monitoring_ctx_sel_config_file (ctx, - sel_config_file) < 0) - { - collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else - { - if (ipmi_monitoring_ctx_sel_config_file (ctx, NULL) < 0) - { - collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SEL) { + t->state.sel.status = ICS_INIT_FAILED; + t->state.sel.last_iteration_ut = 0; } - } - if (reread_sdr_cache) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE; - - if (interpret_oem_data) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_INTERPRET_OEM_DATA; - - if (assume_system_event_record) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_ASSUME_SYSTEM_EVENT_RECORD; - -#ifdef IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES - if (entity_sensor_names) - sel_flags |= IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES; -#endif // IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES - - if (record_ids_length) - { - if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, - hostname, - ipmi_config, - sel_flags, - record_ids, - record_ids_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else if (sensor_types_length) - { - if ((sel_count = ipmi_monitoring_sel_by_sensor_type (ctx, - hostname, - ipmi_config, - sel_flags, - sensor_types, - sensor_types_length, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - else if (date_begin - || date_end) - { - if ((sel_count = ipmi_monitoring_sel_by_date_range (ctx, - hostname, - ipmi_config, - sel_flags, - date_begin, - date_end, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + return ptr; } - else - { - if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx, - hostname, - ipmi_config, - sel_flags, - NULL, - 0, - NULL, - NULL)) < 0) - { - collector_error( "ipmi_monitoring_sel_by_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - } - -#ifdef NETDATA_COMMENTED - printf ("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n", - "Record ID", - "Record Type", - "SEL State", - "Timestamp", - "Sensor Name", - "Sensor Type", - "Event Direction", - "Event Type Code", - "Event Data", - "Event Offset", - "Event Offset String"); -#endif // NETDATA_COMMENTED - - for (i = 0; i < sel_count; i++, ipmi_monitoring_sel_iterator_next (ctx)) - { - int record_id, record_type, sel_state, record_type_class; -#ifdef NETDATA_COMMENTED - int sensor_type, sensor_number, event_direction, - event_offset_type, event_offset, event_type_code, manufacturer_id; - unsigned int timestamp, event_data1, event_data2, event_data3; - char *event_offset_string = NULL; - const char *sensor_type_str; - const char *event_direction_str; - const char *sel_state_str; - char *sensor_name = NULL; - unsigned char oem_data[64]; - int oem_data_len; - unsigned int j; -#endif // NETDATA_COMMENTED - - if ((record_id = ipmi_monitoring_sel_read_record_id (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_record_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + else { + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + t->state.sensors.status = ICS_RUNNING; } - if ((record_type = ipmi_monitoring_sel_read_record_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_record_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SEL) { + t->state.sel.status = ICS_RUNNING; } + } - if ((record_type_class = ipmi_monitoring_sel_read_record_type_class (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_record_type_class(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + t->freq_s = freq_s = MAX(t->freq_s, freq_s); - if ((sel_state = ipmi_monitoring_sel_read_sel_state (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_sel_state(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(t->debug) { + fprintf(stderr, "%s: IPMI minimum update frequency of %s was calculated to %d seconds.\n", + program_name, netdata_collect_type_to_string(t->type), t->freq_s); - netdata_get_sel( - record_id - , record_type_class - , sel_state - ); + fprintf(stderr, "%s: starting data collection of %s\n", + program_name, netdata_collect_type_to_string(t->type)); + } -#ifdef NETDATA_COMMENTED - if (sel_state == IPMI_MONITORING_STATE_NOMINAL) - sel_state_str = "Nominal"; - else if (sel_state == IPMI_MONITORING_STATE_WARNING) - sel_state_str = "Warning"; - else if (sel_state == IPMI_MONITORING_STATE_CRITICAL) - sel_state_str = "Critical"; - else - sel_state_str = "N/A"; + size_t iteration = 0, failures = 0; + usec_t step = t->freq_s * USEC_PER_SEC; - printf ("%d, %d, %s", - record_id, - record_type, - sel_state_str); + heartbeat_t hb; + heartbeat_init(&hb); + while(++iteration) { + heartbeat_next(&hb, step); - if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD - || record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD) - { + if(t->debug) + fprintf(stderr, "%s: calling netdata_ipmi_collect_data() for %s\n", + program_name, netdata_collect_type_to_string(t->type)); - if (ipmi_monitoring_sel_read_timestamp (ctx, ×tamp) < 0) - { - collector_error( "ipmi_monitoring_sel_read_timestamp(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + struct netdata_ipmi_state tmp_state = t->state; - /* XXX: This should be converted to a nice date output using - * your favorite timestamp -> string conversion functions. - */ - printf (", %u", timestamp); + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + tmp_state.sensors.last_iteration_ut = now_monotonic_usec(); + tmp_state.sensors.freq_ut = t->freq_s * USEC_PER_SEC; } - else - printf (", N/A"); - - if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD) - { - /* If you are integrating ipmimonitoring SEL into a monitoring application, - * you may wish to count the number of times a specific error occurred - * and report that to the monitoring application. - * - * In this particular case, you'll probably want to check out - * what sensor type each SEL event is reporting, the - * event offset type, and the specific event offset that occurred. - * - * See ipmi_monitoring_offsets.h for a list of event offsets - * and types. - */ - - if (!(sensor_name = ipmi_monitoring_sel_read_sensor_name (ctx))) - { - collector_error( "ipmi_monitoring_sel_read_sensor_name(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((sensor_type = ipmi_monitoring_sel_read_sensor_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_sensor_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((sensor_number = ipmi_monitoring_sel_read_sensor_number (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_sensor_number(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((event_direction = ipmi_monitoring_sel_read_event_direction (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_direction(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - if ((event_type_code = ipmi_monitoring_sel_read_event_type_code (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_type_code(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - if (ipmi_monitoring_sel_read_event_data (ctx, - &event_data1, - &event_data2, - &event_data3) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_data(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(t->type & IPMI_COLLECT_TYPE_SEL) { + tmp_state.sel.last_iteration_ut = now_monotonic_usec(); + tmp_state.sel.freq_ut = t->freq_s * USEC_PER_SEC; + } - if ((event_offset_type = ipmi_monitoring_sel_read_event_offset_type (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_offset_type(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(netdata_ipmi_collect_data(&t->ipmi_config, t->type, &tmp_state) != 0) + failures++; + else + failures = 0; - if ((event_offset = ipmi_monitoring_sel_read_event_offset (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_event_offset(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } + if(failures > 10) { + collector_error("%s() failed to collect %s data for %zu consecutive times, having made %zu iterations.", + __FUNCTION__, netdata_collect_type_to_string(t->type), failures, iteration); - if (!(event_offset_string = ipmi_monitoring_sel_read_event_offset_string (ctx))) - { - collector_error( "ipmi_monitoring_sel_read_event_offset_string(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SENSORS) { + t->state.sensors.status = ICS_FAILED; + t->state.sensors.last_iteration_ut = 0; } - if (!strlen (sensor_name)) - sensor_name = "N/A"; - - sensor_type_str = _get_sensor_type_string (sensor_type); - - if (event_direction == IPMI_MONITORING_SEL_EVENT_DIRECTION_ASSERTION) - event_direction_str = "Assertion"; - else - event_direction_str = "Deassertion"; - - printf (", %s, %s, %d, %s, %Xh, %Xh-%Xh-%Xh", - sensor_name, - sensor_type_str, - sensor_number, - event_direction_str, - event_type_code, - event_data1, - event_data2, - event_data3); - - if (event_offset_type != IPMI_MONITORING_EVENT_OFFSET_TYPE_UNKNOWN) - printf (", %Xh", event_offset); - else - printf (", N/A"); - - if (event_offset_type != IPMI_MONITORING_EVENT_OFFSET_TYPE_UNKNOWN) - printf (", %s", event_offset_string); - else - printf (", N/A"); - } - else if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD - || record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_NON_TIMESTAMPED_OEM_RECORD) - { - if (record_type_class == IPMI_MONITORING_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD) - { - if ((manufacturer_id = ipmi_monitoring_sel_read_manufacturer_id (ctx)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_manufacturer_id(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; - } - - printf (", Manufacturer ID = %Xh", manufacturer_id); - } - - if ((oem_data_len = ipmi_monitoring_sel_read_oem_data (ctx, oem_data, 1024)) < 0) - { - collector_error( "ipmi_monitoring_sel_read_oem_data(): %s", - ipmi_monitoring_ctx_errormsg (ctx)); - goto cleanup; + if(t->type & IPMI_COLLECT_TYPE_SEL) { + t->state.sel.status = ICS_FAILED; + t->state.sel.last_iteration_ut = 0; } - printf (", OEM Data = "); - - for (j = 0; j < oem_data_len; j++) - printf ("%02Xh ", oem_data[j]); + break; } - else - printf (", N/A, N/A, N/A, N/A, N/A, N/A, N/A"); - printf ("\n"); -#endif // NETDATA_COMMENTED + spinlock_lock(&t->spinlock); + t->state = tmp_state; + spinlock_unlock(&t->spinlock); } - rv = 0; - cleanup: - if (ctx) - ipmi_monitoring_ctx_destroy (ctx); - return (rv); + return ptr; } // ---------------------------------------------------------------------------- -// MAIN PROGRAM FOR NETDATA PLUGIN - -int ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config) { - errno = 0; +// sending data to netdata - if (_ipmimonitoring_sensors(ipmi_config) < 0) return -1; +static inline bool is_sensor_updated(usec_t last_collected_ut, usec_t now_ut, usec_t freq) { + return (now_ut - last_collected_ut < freq * 2) ? true : false; +} - if(netdata_do_sel) { - if(_ipmimonitoring_sel(ipmi_config) < 0) return -2; +static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *state) { + if(state->sensors.status != ICS_RUNNING) { + if(unlikely(state->debug)) + fprintf(stderr, "%s: %s() sensors state is not RUNNING\n", + program_name, __FUNCTION__ ); + return 0; } - return 0; -} + size_t total_sensors_sent = 0; + int update_every = (int)(state->sensors.freq_ut / USEC_PER_SEC); + struct sensor *sn; -int ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config) { - int i, checks = 10; - unsigned long long total = 0; + // generate the CHART/DIMENSION lines, if we have to + dfe_start_reentrant(state->sensors.dict, sn) { + if(unlikely(!sn->do_metric && !sn->do_state)) + continue; - for(i = 0 ; i < checks ; i++) { - if(debug) fprintf(stderr, "freeipmi.plugin: checking data collection speed iteration %d of %d\n", i+1, checks); + bool did_metric = false, did_state = false; - // measure the time a data collection needs - unsigned long long start = now_realtime_usec(); - if(ipmi_collect_data(ipmi_config) < 0) - fatal("freeipmi.plugin: data collection failed."); + if(likely(sn->do_metric)) { + if(unlikely(!is_sensor_updated(sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut))) { + if(unlikely(state->debug)) + fprintf(stderr, "%s: %s() sensor '%s' metric is not UPDATED (last updated %llu, now %llu, freq %llu\n", + program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut); + } + else { + if (unlikely(!sn->metric_chart_sent)) { + sn->metric_chart_sent = true; - unsigned long long end = now_realtime_usec(); + printf("CHART '%s_%s' '' '%s' '%s' '%s' '%s' '%s' %d %d '' '%s' '%s'\n", + sn->context, sn_dfe.name, sn->title, sn->units, sn->family, sn->context, + sn->chart_type, sn->priority + 1, update_every, program_name, "sensors"); - if(debug) fprintf(stderr, "freeipmi.plugin: data collection speed was %llu usec\n", end - start); + printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name); + printf("CLABEL 'type' '%s' 1\n", sn->type); + printf("CLABEL 'component' '%s' 1\n", sn->component); + printf("CLABEL_COMMIT\n"); - // add it to our total - total += end - start; + printf("DIMENSION '%s' '' absolute 1 %d\n", sn->dimension, sn->multiplier); + } - // wait the same time - // to avoid flooding the IPMI processor with requests - sleep_usec(end - start); - } + printf("BEGIN '%s_%s'\n", sn->context, sn_dfe.name); + + switch (sn->sensor_reading_type) { + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32: + printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.uint32_value + ); + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE: + printf("SET '%s' = %lld\n", sn->dimension, + (long long int) (sn->sensor_reading.double_value * sn->multiplier) + ); + break; + + case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL: + printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.bool_value + ); + break; + + default: + case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN: + // this should never happen because we also do the same check at netdata_get_sensor() + sn->do_metric = false; + break; + } - // so, we assume it needed 2x the time - // we find the average in microseconds - // and we round-up to the closest second + printf("END\n"); + did_metric = true; + } + } - return (int)(( total * 2 / checks / 1000000 ) + 1); -} + if(likely(sn->do_state)) { + if(unlikely(!is_sensor_updated(sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut))) { + if (unlikely(state->debug)) + fprintf(stderr, "%s: %s() sensor '%s' state is not UPDATED (last updated %llu, now %llu, freq %llu\n", + program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut); + } + else { + if (unlikely(!sn->state_chart_sent)) { + sn->state_chart_sent = true; + + printf("CHART 'ipmi.sensor_state_%s' '' 'IPMI Sensor State' 'state' 'states' 'ipmi.sensor_state' 'line' %d %d '' '%s' '%s'\n", + sn_dfe.name, sn->priority, update_every, program_name, "sensors"); + + printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name); + printf("CLABEL 'type' '%s' 1\n", sn->type); + printf("CLABEL 'component' '%s' 1\n", sn->component); + printf("CLABEL_COMMIT\n"); + + printf("DIMENSION 'nominal' '' absolute 1 1\n"); + printf("DIMENSION 'warning' '' absolute 1 1\n"); + printf("DIMENSION 'critical' '' absolute 1 1\n"); + printf("DIMENSION 'unknown' '' absolute 1 1\n"); + } -int parse_inband_driver_type (const char *str) -{ - fatal_assert(str); + printf("BEGIN 'ipmi.sensor_state_%s'\n", sn_dfe.name); + printf("SET 'nominal' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_NOMINAL ? 1LL : 0LL); + printf("SET 'warning' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_WARNING ? 1LL : 0LL); + printf("SET 'critical' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_CRITICAL ? 1LL : 0LL); + printf("SET 'unknown' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_UNKNOWN ? 1LL : 0LL); + printf("END\n"); + did_state = true; + } + } - if (strcasecmp (str, IPMI_PARSE_DEVICE_KCS_STR) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_KCS); - else if (strcasecmp (str, IPMI_PARSE_DEVICE_SSIF_STR) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_SSIF); - /* support "open" for those that might be used to - * ipmitool. - */ - else if (strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR2) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_OPENIPMI); - /* support "bmc" for those that might be used to - * ipmitool. - */ - else if (strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR2) == 0) - return (IPMI_MONITORING_DRIVER_TYPE_SUNBMC); + if(likely(did_metric || did_state)) + total_sensors_sent++; + } + dfe_done(sn); - return (-1); + return total_sensors_sent; } -int parse_outofband_driver_type (const char *str) -{ - fatal_assert(str); +static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) { + static bool sel_chart_generated = false; + + if(likely(state->sel.status == ICS_RUNNING)) { + if(unlikely(!sel_chart_generated)) { + sel_chart_generated = true; + printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d '' '%s' '%s'\n" + , state->sel.priority + 2 + , (int)(state->sel.freq_ut / USEC_PER_SEC) + , program_name + , "sel" + ); + printf("DIMENSION events '' absolute 1 1\n"); + } - if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_STR) == 0) - return (IPMI_MONITORING_PROTOCOL_VERSION_1_5); - /* support "lanplus" for those that might be used to ipmitool. - * support typo variants to ease. - */ - else if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR2) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR3) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR4) == 0 - || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR5) == 0) - return (IPMI_MONITORING_PROTOCOL_VERSION_2_0); + printf( + "BEGIN ipmi.events\n" + "SET events = %zu\n" + "END\n" + , state->sel.events + ); + } - return (-1); + return state->sel.events; } -int host_is_local(const char *host) -{ - if (host && (!strcmp(host, "localhost") || !strcmp(host, "127.0.0.1") || !strcmp(host, "::1"))) - return (1); - - return (0); -} +// ---------------------------------------------------------------------------- +// main, command line arguments parsing int main (int argc, char **argv) { + bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT; + stderror = stderr; clocks_init(); + int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency + int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events + bool debug = false; + // ------------------------------------------------------------------------ // initialization of netdata plugin @@ -1610,40 +1450,85 @@ int main (int argc, char **argv) { error_log_errors_per_period = 100; error_log_throttle_period = 3600; - // ------------------------------------------------------------------------ // parse command line parameters - int i, freq = 0; + int i, freq_s = 0; for(i = 1; i < argc ; i++) { - if(isdigit(*argv[i]) && !freq) { + if(isdigit(*argv[i]) && !freq_s) { int n = str2i(argv[i]); if(n > 0 && n < 86400) { - freq = n; + freq_s = n; continue; } } else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) { - printf("freeipmi.plugin %s\n", VERSION); + printf("%s %s\n", program_name, VERSION); exit(0); } else if(strcmp("debug", argv[i]) == 0) { - debug = 1; + debug = true; continue; } else if(strcmp("sel", argv[i]) == 0) { - netdata_do_sel = 1; + netdata_do_sel = true; continue; } else if(strcmp("no-sel", argv[i]) == 0) { - netdata_do_sel = 0; + netdata_do_sel = false; continue; } + else if(strcmp("reread-sdr-cache", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE; + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE; + remove_reread_sdr_after_first_use = false; + if (debug) fprintf(stderr, "%s: reread-sdr-cache enabled for both sensors and SEL\n", program_name); + } + else if(strcmp("interpret-oem-data", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_INTERPRET_OEM_DATA; + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA; + if (debug) fprintf(stderr, "%s: interpret-oem-data enabled for both sensors and SEL\n", program_name); + } + else if(strcmp("assume-system-event-record", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ASSUME_SYSTEM_EVENT_RECORD; + if (debug) fprintf(stderr, "%s: assume-system-event-record enabled\n", program_name); + } + else if(strcmp("ignore-non-interpretable-sensors", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS; + if (debug) fprintf(stderr, "%s: ignore-non-interpretable-sensors enabled\n", program_name); + } + else if(strcmp("bridge-sensors", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS; + if (debug) fprintf(stderr, "%s: bridge-sensors enabled\n", program_name); + } + else if(strcmp("shared-sensors", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS; + if (debug) fprintf(stderr, "%s: shared-sensors enabled\n", program_name); + } + else if(strcmp("no-discrete-reading", argv[i]) == 0) { + global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING); + if (debug) fprintf(stderr, "%s: discrete-reading disabled\n", program_name); + } + else if(strcmp("ignore-scanning-disabled", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED; + if (debug) fprintf(stderr, "%s: ignore-scanning-disabled enabled\n", program_name); + } + else if(strcmp("assume-bmc-owner", argv[i]) == 0) { + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER; + if (debug) fprintf(stderr, "%s: assume-bmc-owner enabled\n", program_name); + } +#if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES) + else if(strcmp("entity-sensor-names", argv[i]) == 0) { + global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES; + global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES; + if (debug) fprintf(stderr, "%s: entity-sensor-names enabled for both sensors and SEL\n", program_name); + } +#endif else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { fprintf(stderr, "\n" - " netdata freeipmi.plugin %s\n" - " Copyright (C) 2016-2017 Costa Tsaousis <costa@tsaousis.gr>\n" + " netdata %s %s\n" + " Copyright (C) 2023 Netdata Inc.\n" " Released under GNU General Public License v3 or later.\n" " All rights reserved.\n" "\n" @@ -1661,16 +1546,53 @@ int main (int argc, char **argv) { " no-sel enable/disable SEL collection\n" " default: %s\n" "\n" + " reread-sdr-cache re-read SDR cache on every iteration\n" + " default: disabled\n" + "\n" + " interpret-oem-data attempt to parse OEM data\n" + " default: disabled\n" + "\n" + " assume-system-event-record \n" + " tread illegal SEL events records as normal\n" + " default: disabled\n" + "\n" + " ignore-non-interpretable-sensors \n" + " do not read sensors that cannot be interpreted\n" + " default: disabled\n" + "\n" + " bridge-sensors bridge sensors not owned by the BMC\n" + " default: disabled\n" + "\n" + " shared-sensors enable shared sensors, if found\n" + " default: disabled\n" + "\n" + " no-discrete-reading do not read sensors that their event/reading type code is invalid\n" + " default: enabled\n" + "\n" + " ignore-scanning-disabled \n" + " Ignore the scanning bit and read sensors no matter what\n" + " default: disabled\n" + "\n" + " assume-bmc-owner assume the BMC is the sensor owner no matter what\n" + " (usually bridging is required too)\n" + " default: disabled\n" + "\n" +#if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES) + " entity-sensor-names sensor names prefixed with entity id and instance\n" + " default: disabled\n" + "\n" +#endif " hostname HOST\n" " username USER\n" " password PASS connect to remote IPMI host\n" " default: local IPMI processor\n" "\n" + " no-auth-code-check\n" " noauthcodecheck don't check the authentication codes returned\n" "\n" " driver-type IPMIDRIVER\n" " Specify the driver type to use instead of doing an auto selection. \n" - " The currently available outofband drivers are LAN and LAN_2_0,\n" + " The currently available outofband drivers are LAN and LAN_2_0,\n" " which perform IPMI 1.5 and IPMI 2.0 respectively. \n" " The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC.\n" "\n" @@ -1680,6 +1602,9 @@ int main (int argc, char **argv) { " sensor-config-file FILE filename to read sensor configuration\n" " default: %s\n" "\n" + " sel-config-file FILE filename to read sel configuration\n" + " default: %s\n" + "\n" " ignore N1,N2,N3,... sensor IDs to ignore\n" " default: none\n" "\n" @@ -1700,11 +1625,12 @@ int main (int argc, char **argv) { " For more information:\n" " https://github.com/netdata/netdata/tree/master/collectors/freeipmi.plugin\n" "\n" - , VERSION - , netdata_update_every + , program_name, VERSION + , update_every , netdata_do_sel?"enabled":"disabled" , sdr_cache_directory?sdr_cache_directory:"system default" , sensor_config_file?sensor_config_file:"system default" + , sel_config_file?sel_config_file:"system default" ); exit(1); } @@ -1713,7 +1639,7 @@ int main (int argc, char **argv) { char *s = argv[i]; // mask it be hidden from the process tree while(*s) *s++ = 'x'; - if(debug) fprintf(stderr, "freeipmi.plugin: hostname set to '%s'\n", hostname); + if(debug) fprintf(stderr, "%s: hostname set to '%s'\n", program_name, hostname); continue; } else if(i < argc && strcmp("username", argv[i]) == 0) { @@ -1721,7 +1647,7 @@ int main (int argc, char **argv) { char *s = argv[i]; // mask it be hidden from the process tree while(*s) *s++ = 'x'; - if(debug) fprintf(stderr, "freeipmi.plugin: username set to '%s'\n", username); + if(debug) fprintf(stderr, "%s: username set to '%s'\n", program_name, username); continue; } else if(i < argc && strcmp("password", argv[i]) == 0) { @@ -1729,149 +1655,247 @@ int main (int argc, char **argv) { char *s = argv[i]; // mask it be hidden from the process tree while(*s) *s++ = 'x'; - if(debug) fprintf(stderr, "freeipmi.plugin: password set to '%s'\n", password); + if(debug) fprintf(stderr, "%s: password set to '%s'\n", program_name, password); continue; } else if(strcmp("driver-type", argv[i]) == 0) { if (hostname) { - protocol_version=parse_outofband_driver_type(argv[++i]); - if(debug) fprintf(stderr, "freeipmi.plugin: outband protocol version set to '%d'\n", protocol_version); + protocol_version = netdata_parse_outofband_driver_type(argv[++i]); + if(debug) fprintf(stderr, "%s: outband protocol version set to '%d'\n", + program_name, protocol_version); } else { - driver_type=parse_inband_driver_type(argv[++i]); - if(debug) fprintf(stderr, "freeipmi.plugin: inband driver type set to '%d'\n", driver_type); + driver_type = netdata_parse_inband_driver_type(argv[++i]); + if(debug) fprintf(stderr, "%s: inband driver type set to '%d'\n", + program_name, driver_type); } continue; - } else if (i < argc && strcmp("noauthcodecheck", argv[i]) == 0) { - if (!hostname || host_is_local(hostname)) { + } else if (i < argc && (strcmp("noauthcodecheck", argv[i]) == 0 || strcmp("no-auth-code-check", argv[i]) == 0)) { + if (!hostname || netdata_host_is_localhost(hostname)) { if (debug) - fprintf( - stderr, - "freeipmi.plugin: noauthcodecheck workaround flag is ignored for inband configuration\n"); - } else if (protocol_version < 0 || protocol_version == IPMI_MONITORING_PROTOCOL_VERSION_1_5) { + fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for inband configuration\n", + program_name); + + } + else if (protocol_version < 0 || protocol_version == IPMI_MONITORING_PROTOCOL_VERSION_1_5) { workaround_flags |= IPMI_MONITORING_WORKAROUND_FLAGS_PROTOCOL_VERSION_1_5_NO_AUTH_CODE_CHECK; + if (debug) - fprintf(stderr, "freeipmi.plugin: noauthcodecheck workaround flag enabled\n"); - } else { + fprintf(stderr, "%s: noauthcodecheck workaround flag enabled\n", program_name); + } + else { if (debug) - fprintf( - stderr, - "freeipmi.plugin: noauthcodecheck workaround flag is ignored for protocol version 2.0\n"); + fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for protocol version 2.0\n", + program_name); } continue; } else if(i < argc && strcmp("sdr-cache-dir", argv[i]) == 0) { sdr_cache_directory = argv[++i]; - if(debug) fprintf(stderr, "freeipmi.plugin: SDR cache directory set to '%s'\n", sdr_cache_directory); + + if(debug) + fprintf(stderr, "%s: SDR cache directory set to '%s'\n", program_name, sdr_cache_directory); + continue; } else if(i < argc && strcmp("sensor-config-file", argv[i]) == 0) { sensor_config_file = argv[++i]; - if(debug) fprintf(stderr, "freeipmi.plugin: sensor config file set to '%s'\n", sensor_config_file); + if(debug) fprintf(stderr, "%s: sensor config file set to '%s'\n", program_name, sensor_config_file); + continue; + } + else if(i < argc && strcmp("sel-config-file", argv[i]) == 0) { + sel_config_file = argv[++i]; + if(debug) fprintf(stderr, "%s: sel config file set to '%s'\n", program_name, sel_config_file); continue; } else if(i < argc && strcmp("ignore", argv[i]) == 0) { - excluded_record_ids_parse(argv[++i]); + excluded_record_ids_parse(argv[++i], debug); continue; } else if(i < argc && strcmp("ignore-status", argv[i]) == 0) { - excluded_status_record_ids_parse(argv[++i]); + excluded_status_record_ids_parse(argv[++i], debug); continue; } - collector_error("freeipmi.plugin: ignoring parameter '%s'", argv[i]); + collector_error("%s(): ignoring parameter '%s'", __FUNCTION__, argv[i]); } errno = 0; - if(freq >= netdata_update_every) - netdata_update_every = freq; - - else if(freq) - collector_error("update frequency %d seconds is too small for IPMI. Using %d.", freq, netdata_update_every); + if(freq_s && freq_s < update_every) + collector_error("%s(): update frequency %d seconds is too small for IPMI. Using %d.", + __FUNCTION__, freq_s, update_every); + update_every = freq_s = MAX(freq_s, update_every); + update_every_sel = MAX(update_every, update_every_sel); // ------------------------------------------------------------------------ // initialize IPMI - struct ipmi_monitoring_ipmi_config ipmi_config; - - if(debug) fprintf(stderr, "freeipmi.plugin: calling _init_ipmi_config()\n"); - - _init_ipmi_config(&ipmi_config); - if(debug) { - fprintf(stderr, "freeipmi.plugin: calling ipmi_monitoring_init()\n"); - ipmimonitoring_init_flags|=IPMI_MONITORING_FLAGS_DEBUG|IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS; + fprintf(stderr, "%s: calling ipmi_monitoring_init()\n", program_name); + ipmimonitoring_init_flags |= IPMI_MONITORING_FLAGS_DEBUG|IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS; } - if(ipmi_monitoring_init(ipmimonitoring_init_flags, &errnum) < 0) - fatal("ipmi_monitoring_init: %s", ipmi_monitoring_ctx_strerror(errnum)); - - if(debug) fprintf(stderr, "freeipmi.plugin: detecting IPMI minimum update frequency...\n"); - freq = ipmi_detect_speed_secs(&ipmi_config); - if(debug) fprintf(stderr, "freeipmi.plugin: IPMI minimum update frequency was calculated to %d seconds.\n", freq); - - if(freq > netdata_update_every) { - collector_info("enforcing minimum data collection frequency, calculated to %d seconds.", freq); - netdata_update_every = freq; - } + int rc; + if(ipmi_monitoring_init(ipmimonitoring_init_flags, &rc) < 0) + fatal("ipmi_monitoring_init: %s", ipmi_monitoring_ctx_strerror(rc)); + // ------------------------------------------------------------------------ + // create the data collection threads + + struct ipmi_collection_thread sensors_data = { + .type = IPMI_COLLECT_TYPE_SENSORS, + .freq_s = update_every, + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .debug = debug, + .state = { + .debug = debug, + .sensors = { + .status = ICS_INIT, + .last_iteration_ut = now_monotonic_usec(), + .freq_ut = update_every * USEC_PER_SEC, + .priority = IPMI_SENSORS_DASHBOARD_PRIORITY, + .dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE|DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct sensor)), + }, + }, + }, sel_data = { + .type = IPMI_COLLECT_TYPE_SEL, + .freq_s = update_every_sel, + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .debug = debug, + .state = { + .debug = debug, + .sel = { + .status = ICS_INIT, + .last_iteration_ut = now_monotonic_usec(), + .freq_ut = update_every_sel * USEC_PER_SEC, + .priority = IPMI_SEL_DASHBOARD_PRIORITY, + }, + }, + }; + + netdata_thread_t sensors_thread = 0, sel_thread = 0; + + netdata_thread_create(&sensors_thread, "IPMI[sensors]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sensors_data); + + if(netdata_do_sel) + netdata_thread_create(&sel_thread, "IPMI[sel]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sel_data); // ------------------------------------------------------------------------ // the main loop - if(debug) fprintf(stderr, "freeipmi.plugin: starting data collection\n"); + if(debug) fprintf(stderr, "%s: starting data collection\n", program_name); time_t started_t = now_monotonic_sec(); size_t iteration = 0; - usec_t step = netdata_update_every * USEC_PER_SEC; + usec_t step = 100 * USEC_PER_MS; + bool global_chart_created = false; + bool tty = isatty(fileno(stderr)) == 1; heartbeat_t hb; heartbeat_init(&hb); for(iteration = 0; 1 ; iteration++) { usec_t dt = heartbeat_next(&hb, step); - if (iteration) { - if (iteration == 1) { - fprintf( - stdout, - "CHART netdata.freeipmi_availability_status '' 'Plugin availability status' 'status' plugins netdata.plugin_availability_status line 146000 %d\n" - "DIMENSION available '' absolute 1 1\n", - netdata_update_every); + if(!tty) + fprintf(stdout, "\n"); // keepalive to avoid parser read timeout (2 minutes) during ipmi_detect_speed_secs() + + struct netdata_ipmi_state state = {0 }; + + spinlock_lock(&sensors_data.spinlock); + state.sensors = sensors_data.state.sensors; + spinlock_unlock(&sensors_data.spinlock); + + spinlock_lock(&sel_data.spinlock); + state.sel = sel_data.state.sel; + spinlock_unlock(&sel_data.spinlock); + + switch(state.sensors.status) { + case ICS_RUNNING: + step = update_every * USEC_PER_SEC; + if(state.sensors.last_iteration_ut < now_monotonic_usec() - IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS * USEC_PER_SEC) { + collector_error("%s(): sensors have not be collected for %zu seconds. Exiting to restart.", + __FUNCTION__, (size_t)((now_monotonic_usec() - state.sensors.last_iteration_ut) / USEC_PER_SEC)); + + fprintf(stdout, "EXIT\n"); + fflush(stdout); + exit(0); + } + break; + + case ICS_INIT: + continue; + + case ICS_INIT_FAILED: + collector_error("%s(): sensors failed to initialize. Calling DISABLE.", __FUNCTION__); + fprintf(stdout, "DISABLE\n"); + fflush(stdout); + exit(0); + + case ICS_FAILED: + collector_error("%s(): sensors fails repeatedly to collect metrics. Exiting to restart.", __FUNCTION__); + fprintf(stdout, "EXIT\n"); + fflush(stdout); + exit(0); + } + + if(netdata_do_sel) { + switch (state.sensors.status) { + case ICS_RUNNING: + case ICS_INIT: + break; + + case ICS_INIT_FAILED: + case ICS_FAILED: + collector_error("%s(): SEL fails to collect events. Disabling SEL collection.", __FUNCTION__); + netdata_do_sel = false; + break; } - fprintf( - stdout, - "BEGIN netdata.freeipmi_availability_status\n" - "SET available = 1\n" - "END\n"); } - if(debug && iteration) - fprintf(stderr, "freeipmi.plugin: iteration %zu, dt %llu usec, sensors collected %zu, sensors sent to netdata %zu \n" + if(unlikely(debug)) + fprintf(stderr, "%s: calling send_ipmi_sensor_metrics_to_netdata()\n", program_name); + + state.updates.now_ut = now_monotonic_usec(); + send_ipmi_sensor_metrics_to_netdata(&state); + + if(netdata_do_sel) + send_ipmi_sel_metrics_to_netdata(&state); + + if(unlikely(debug)) + fprintf(stderr, "%s: iteration %zu, dt %llu usec, sensors ever collected %zu, sensors last collected %zu \n" + , program_name , iteration , dt - , netdata_sensors_collected - , netdata_sensors_updated + , dictionary_entries(state.sensors.dict) + , state.sensors.collected ); - netdata_mark_as_not_updated(); + if (!global_chart_created) { + global_chart_created = true; - if(debug) fprintf(stderr, "freeipmi.plugin: calling ipmi_collect_data()\n"); - if(ipmi_collect_data(&ipmi_config) < 0) - fatal("data collection failed."); + fprintf(stdout, + "CHART netdata.freeipmi_availability_status '' 'Plugin availability status' 'status' " + "plugins netdata.plugin_availability_status line 146000 %d '' '%s' '%s'\n" + "DIMENSION available '' absolute 1 1\n", + update_every, program_name, ""); + } - if(debug) fprintf(stderr, "freeipmi.plugin: calling send_metrics_to_netdata()\n"); - send_metrics_to_netdata(); - fflush(stdout); + fprintf(stdout, + "BEGIN netdata.freeipmi_availability_status\n" + "SET available = 1\n" + "END\n"); // restart check (14400 seconds) - if (now_monotonic_sec() - started_t > 14400) { + if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) { + collector_error("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__); fprintf(stdout, "EXIT\n"); fflush(stdout); exit(0); } + + fflush(stdout); } } - |