diff options
Diffstat (limited to 'schema')
23 files changed, 947 insertions, 0 deletions
diff --git a/schema/mysql/get_sla_ok_percent.sql b/schema/mysql/get_sla_ok_percent.sql new file mode 100644 index 0000000..a546326 --- /dev/null +++ b/schema/mysql/get_sla_ok_percent.sql @@ -0,0 +1,333 @@ +DROP FUNCTION IF EXISTS idoreports_get_sla_ok_percent; + +DELIMITER // + +CREATE FUNCTION idoreports_get_sla_ok_percent ( + id BIGINT UNSIGNED, + start DATETIME, + end DATETIME, + sla_timeperiod_object_id BIGINT UNSIGNED +) RETURNS DECIMAL(7, 4) + READS SQL DATA +BEGIN + DECLARE result DECIMAL(7, 4); + + -- We use user-defined @-vars, this allows for easier sub-queries testing + SET + -- First, set our parameters: + @id = id, + @start = start, + @end = end, + @sla_timeperiod_object_id = sla_timeperiod_object_id, + + -- Then fetch our object type id: + @type_id = (SELECT objecttype_id FROM icinga_objects WHERE object_id = id), + + -- Next, reset inline vars: + @next_type = NULL, + @last_ts = NULL, + @last_type = NULL, + @add_duration = 0, + @last_state = NULL, + @cnt_tp = null, + @cnt_dt = NULL, + + -- And finally reset all eventual result variables: + @sla_ok_seconds = NULL, + @sla_ok_percent = NULL, + @problem_seconds = NULL, + @problem_percent = NULL, + @problem_in_downtime_seconds = NULL, + @problem_in_downtime_percent = NULL, + @total_seconds = NULL + ; + + + IF @type_id NOT IN (1, 2) THEN + RETURN NULL; + END IF; + +SELECT CASE WHEN @last_state IS NULL THEN NULL ELSE CAST(sla_ok_seconds / total_time * 100 AS DECIMAL(7, 4)) END INTO result FROM ( +SELECT + @sla_ok_seconds := SUM( + CASE + WHEN in_downtime + out_of_slatime > 0 THEN 1 + WHEN is_problem THEN 0 + ELSE 1 + END * duration + ) AS sla_ok_seconds, + @sla_ok_percent := CAST(100 * SUM( + CASE + WHEN in_downtime + out_of_slatime > 0 THEN 1 + WHEN is_problem THEN 0 + ELSE 1 + END * duration / (UNIX_TIMESTAMP(@end) - UNIX_TIMESTAMP(@start)) + ) AS DECIMAL(7, 4)) AS sla_ok_percent, + @problem_seconds := SUM(is_problem * duration) AS problem_seconds, + @problem_percent := CAST( + SUM(is_problem * duration) / SUM(duration) * 100 AS DECIMAL(7, 4) + ) AS problem_percent, + @problem_in_downtime_seconds := SUM( + is_problem * in_downtime * duration + ) AS problem_in_downtime_seconds, + @problem_in_downtime_percent := CAST(100 * SUM( + is_problem * in_downtime * duration + / (UNIX_TIMESTAMP(@end) - UNIX_TIMESTAMP(@start)) + ) AS DECIMAL(7, 4)) AS problem_in_downtime_percent, + @total_seconds := SUM(duration) AS total_time +FROM ( + -- ----------------------------------------------------------------- -- +-- SLA relevant events, re-modelled with duration -- +-- -- +-- This declares and clears the following variables: -- +-- * @last_state -- +-- * @add_duration -- +-- * @next_type -- +-- * @cnt_dt -- +-- * @cnt_tp -- +-- * @type_id -- +-- * @next_type -- +-- * @start (used) -- +-- -- +-- Columns: -- +-- *** -- +-- ----------------------------------------------------------------- -- + +SELECT + state_time, + UNIX_TIMESTAMP(state_time), + CAST(COALESCE(@last_ts, UNIX_TIMESTAMP(@start)) AS UNSIGNED), + CAST(UNIX_TIMESTAMP(state_time) + - CAST(COALESCE(@last_ts, UNIX_TIMESTAMP(@start)) AS UNSIGNED) + + CAST(COALESCE(@add_duration, 0) AS UNSIGNED) AS UNSIGNED) AS duration, + + -- @add_duration is used as long as we haven't seen a state + @add_duration AS add_duration, + + @next_type AS current_type, + @next_type := type AS next_type, + + -- current_state is the state from the last state change until now: + @last_state AS current_state, + + CASE WHEN @last_state IS NULL THEN NULL ELSE + CASE WHEN @type_id = 1 + THEN CASE WHEN @last_state > 0 THEN 1 ELSE 0 END + ELSE CASE WHEN @last_state > 1 THEN 1 ELSE 0 END + END + END AS is_problem, + + CASE WHEN COALESCE(@cnt_dt, 0) > 0 THEN 1 ELSE 0 END AS in_downtime, + CASE WHEN COALESCE(@cnt_tp, 0) > 0 THEN 1 ELSE 0 END AS out_of_slatime, + + COALESCE(@cnt_dt, 0) AS dt_depth, + COALESCE(@cnt_tp, 0) AS tp_depth, + + CASE type + WHEN 'dt_start' THEN @cnt_dt := COALESCE(@cnt_dt, 0) + 1 + WHEN 'dt_end' THEN @cnt_dt := GREATEST(@cnt_dt - 1, 0) + ELSE COALESCE(@cnt_dt, 0) + END AS next_dt_depth, + + CASE type + WHEN 'sla_end' THEN @cnt_tp := COALESCE(@cnt_tp, 0) + 1 + WHEN 'sla_start' THEN @cnt_tp := GREATEST(@cnt_tp - 1, 0) + ELSE COALESCE(@cnt_tp, 0) + END AS next_tp_depth, + + -- next_state is the state from now on, so it replaces @last_state: + CASE + -- Set our next @last_state if we have a hard state change + WHEN type IN ('hard_state', 'former_state', 'current_state') THEN @last_state := state + -- ...or if there is a soft_state and no @last_state has been seen before + WHEN type = 'soft_state' THEN + -- If we don't have a @last_state... + CASE WHEN @last_state IS NULL + -- ...use and set our own last_hard_state (last_state is the inner query alias)... + THEN @last_state := last_state + -- ...and return @last_state otherwise, as soft states shall have no + -- impact on availability + ELSE @last_state END + + WHEN type IN ('dt_start', 'sla_end') THEN @last_state + WHEN type IN ('dt_end', 'sla_start') THEN @last_state + END AS next_state, + + -- Our start_time is either the last end_time or @start... + @last_ts AS start_time, + + -- ...end when setting the new end_time we remember it in @last_ts: + CASE + WHEN type = 'fake_end' THEN state_time + ELSE @last_ts := UNIX_TIMESTAMP(state_time) + END AS end_time + +FROM ( +-- ----------------------------------------------------------------- -- +-- SLA relevant events -- +-- -- +-- Variables: -- +-- * @id The IDO object_id -- +-- * @start Start of the chosen time period. Currently DATE, should -- +-- be UNIX_TIMESTAMP -- +-- * @end Related end of the chosen time period -- +-- * @sla_timeperiod_object_id Time period object ID in case SLA -- +-- times should be respected -- +-- -- +-- Columns: -- +-- state_time, type, state, last_state -- +-- ----------------------------------------------------------------- -- + +-- START fetching statehistory events +SELECT + state_time, + CASE state_type WHEN 1 THEN 'hard_state' ELSE 'soft_state' END AS type, + state, + -- Workaround for a nasty Icinga issue. In case a hard state is reached + -- before max_check_attempts, the last_hard_state value is wrong. As of + -- this we are stepping through all single events, even soft ones. Of + -- course soft states do not have an influence on the availability: + CASE state_type WHEN 1 THEN last_state ELSE last_hard_state END AS last_state +FROM icinga_statehistory +WHERE object_id = @id + AND state_time >= @start + AND state_time <= @end +-- STOP fetching statehistory events + +-- START fetching last state BEFORE the given interval as an event +UNION SELECT * FROM ( + SELECT + @start AS state_time, + 'former_state' AS type, + CASE state_type WHEN 1 THEN state ELSE last_hard_state END AS state, + CASE state_type WHEN 1 THEN last_state ELSE last_hard_state END AS last_state + FROM icinga_statehistory h + WHERE object_id = @id + AND state_time < @start + ORDER BY h.state_time DESC + LIMIT 1 +) formerstate +-- END fetching last state BEFORE the given interval as an event + +-- START ADDING a fake end +UNION SELECT + @end AS state_time, + 'fake_end' AS type, + NULL AS state, + NULL AS last_state +FROM DUAL +-- END ADDING a fake end + +-- START fetching current host state as an event +-- TODO: This is not 100% correct. state should be fine, last_state sometimes isn't. +UNION SELECT + GREATEST( + @start, + CASE state_type WHEN 1 THEN last_state_change ELSE last_hard_state_change END + ) AS state_time, + 'current_state' AS type, + CASE state_type WHEN 1 THEN current_state ELSE last_hard_state END AS state, + last_hard_state AS last_state +FROM icinga_hoststatus +WHERE CASE state_type WHEN 1 THEN last_state_change ELSE last_hard_state_change END < @start + AND host_object_id = @id + AND CASE state_type WHEN 1 THEN last_state_change ELSE last_hard_state_change END <= @end + AND status_update_time > @start +-- END fetching current host state as an event + +-- START fetching current service state as an event +-- ++ , only if older than @start +UNION SELECT + GREATEST( + @start, + CASE state_type WHEN 1 THEN last_state_change ELSE last_hard_state_change END + ) AS state_time, + 'current_state' AS type, + CASE state_type WHEN 1 THEN current_state ELSE last_hard_state END AS state, + last_hard_state AS last_state +FROM icinga_servicestatus +WHERE CASE state_type WHEN 1 THEN last_state_change ELSE last_hard_state_change END < @start + AND service_object_id = @id + -- AND CASE state_type WHEN 1 THEN last_state_change ELSE last_hard_state_change END <= @end + AND status_update_time > @start +-- END fetching current service state as an event + +-- START adding add all related downtime start times +-- TODO: Handling downtimes still being active would be nice. +-- But pay attention: they could be completely outdated +UNION SELECT + GREATEST(actual_start_time, @start) AS state_time, + 'dt_start' AS type, + NULL AS state, + NULL AS last_state +FROM icinga_downtimehistory +WHERE object_id = @id + AND actual_start_time < @end + AND actual_end_time > @start +-- STOP adding add all related downtime start times + +-- START adding add all related downtime end times +UNION SELECT + LEAST(actual_end_time, @end) AS state_time, + 'dt_end' AS type, + NULL AS state, + NULL AS last_state +FROM icinga_downtimehistory +WHERE object_id = @id + AND actual_start_time < @end + AND actual_end_time > @start +-- STOP adding add all related downtime end times + +-- START fetching SLA time period start times --- +UNION ALL +SELECT + start_time AS state_time, + 'sla_start' AS type, + NULL AS state, + NULL AS last_state +FROM icinga_outofsla_periods +WHERE timeperiod_object_id = @sla_timeperiod_object_id + AND start_time >= @start + AND start_time <= @end +-- STOP fetching SLA time period start times --- + +-- START fetching SLA time period end times --- +UNION ALL SELECT + end_time AS state_time, + 'sla_end' AS type, + NULL AS state, + NULL AS last_state + FROM icinga_outofsla_periods +WHERE timeperiod_object_id = @sla_timeperiod_object_id + AND end_time >= @start + AND end_time <= @end +-- STOP fetching SLA time period end times --- + +ORDER BY state_time ASC, + CASE type + -- Order is important. current_state and former_state + -- are potential candidates for the initial state of the chosen period. + -- the last one wins, and preferably we have a state change before the + -- chosen period. Otherwise we assume that the first state change after + -- that period knows about the former state. Last fallback is the + WHEN 'current_state' THEN 0 + WHEN 'former_state' THEN 2 + WHEN 'soft_state' THEN 3 + WHEN 'hard_state' THEN 4 + WHEN 'sla_end' THEN 5 + WHEN 'sla_start' THEN 6 + WHEN 'dt_start' THEN 7 + WHEN 'dt_end' THEN 8 + ELSE 9 + END ASC + +) events + +) intervals + +) sladetails; + + RETURN result; +END// + +DELIMITER ; diff --git a/schema/mysql/slaperiods.sql b/schema/mysql/slaperiods.sql new file mode 100644 index 0000000..32bc55e --- /dev/null +++ b/schema/mysql/slaperiods.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS icinga_sla_periods; +CREATE TABLE icinga_sla_periods ( + timeperiod_object_id BIGINT(20) UNSIGNED NOT NULL, + start_time timestamp NOT NULL, + end_time timestamp NULL DEFAULT NULL, + PRIMARY KEY tp_start (timeperiod_object_id, start_time), + UNIQUE KEY tp_end (timeperiod_object_id, end_time) +) ENGINE InnoDB; + +DROP TABLE IF EXISTS icinga_outofsla_periods; +CREATE TABLE icinga_outofsla_periods ( + timeperiod_object_id BIGINT(20) UNSIGNED NOT NULL, + start_time timestamp NOT NULL, + end_time timestamp NULL DEFAULT NULL, + PRIMARY KEY tp_start (timeperiod_object_id, start_time), + UNIQUE KEY tp_end (timeperiod_object_id, end_time) +) ENGINE InnoDB; diff --git a/schema/postgresql/get_sla_ok_percent.sql b/schema/postgresql/get_sla_ok_percent.sql new file mode 100644 index 0000000..03eee16 --- /dev/null +++ b/schema/postgresql/get_sla_ok_percent.sql @@ -0,0 +1,235 @@ +DROP FUNCTION IF EXISTS idoreports_get_sla_ok_percent(BIGINT, TIMESTAMPTZ, TIMESTAMPTZ, INT); + +CREATE OR REPLACE FUNCTION idoreports_get_sla_ok_percent( + id BIGINT, + starttime TIMESTAMP WITHOUT TIME ZONE, + endtime TIMESTAMP WITHOUT TIME ZONE, + sla_id INTEGER DEFAULT NULL +) RETURNS float + LANGUAGE SQL +AS +$$ + +WITH + crit AS ( + SELECT + CASE objecttype_id + WHEN 1 THEN 0 + WHEN 2 THEN 1 + END AS value + FROM + icinga_objects + WHERE + object_id = id + ), + before AS ( + -- low border, last event before the range we are looking for: + SELECT + down, + state_time_ AS state_time, + state + FROM + ( + ( + SELECT + 1 AS prio, + state > crit.value AS down, + GREATEST(state_time, starttime) AS state_time_, + state + FROM + icinga_statehistory, + crit + WHERE + object_id = id + AND state_time < starttime + AND state_type = 1 + ORDER BY + state_time DESC + LIMIT 1 + ) + UNION ALL + ( + SELECT + 2 AS prio, + state > crit.value AS down, + GREATEST(state_time, starttime) AS state_time_, + state + FROM + icinga_statehistory, + crit + WHERE + object_id = id + AND state_time < starttime + ORDER BY + state_time DESC + LIMIT 1 + ) + ) ranked + ORDER BY + prio + LIMIT 1 + ), + all_hard_events AS ( + -- the actual range we're looking for: + SELECT + state > crit.value AS down, + state_time, + state + FROM + icinga_statehistory, + crit + WHERE + object_id = id + AND state_time >= starttime + AND state_time <= endtime + AND state_type = 1 + ), + after AS ( + -- the "younger" of the current host/service state and the first recorded event + ( + SELECT + state > crit_value AS down, + LEAST(state_time, endtime) AS state_time, + state + + FROM + ( + ( + SELECT + state_time, + state, + crit.value AS crit_value + FROM + icinga_statehistory, + crit + WHERE + object_id = id + AND state_time > endtime + AND state_type = 1 + ORDER BY + state_time + LIMIT 1 + ) + UNION ALL + ( + SELECT + status_update_time, + current_state, + crit.value AS crit_value + FROM + icinga_hoststatus, + crit + WHERE + host_object_id = id + AND state_type = 1 + ) + UNION ALL + ( + SELECT + status_update_time, + current_state, + crit.value AS crit_value + FROM + icinga_servicestatus, + crit + WHERE + service_object_id = id + AND state_type = 1 + ) + ) AS after_searched_period + ORDER BY + state_time + LIMIT 1 + ) + ), + allevents AS ( + TABLE before + UNION ALL + TABLE all_hard_events + UNION ALL + TABLE after + ), + downtimes AS ( + ( + SELECT + tsrange(actual_start_time, actual_end_time) AS downtime + FROM + icinga_downtimehistory + WHERE + object_id = id + ) + UNION ALL + ( + SELECT + tsrange(start_time, end_time) AS downtime + FROM + icinga_outofsla_periods + WHERE + timeperiod_object_id = sla_id + ) + ), + enriched AS ( + SELECT + down, + tsrange(state_time, COALESCE(lead(state_time) OVER w, endtime), '(]') AS timeframe + --,lead(state_time) OVER w - state_time AS dauer + FROM + ( + SELECT + state > crit.value AS down, + lead(state, 1, state) OVER w > crit.value AS next_down, + lag(state, 1, state) OVER w > crit.value AS prev_down, + state_time, + state + FROM + allevents, + crit WINDOW w AS (ORDER BY state_time) + ) alle WINDOW w AS (ORDER BY state_time) + ), + relevant AS ( + SELECT + down, + timeframe * tsrange(starttime, endtime, '(]') AS timeframe + FROM + enriched + WHERE + timeframe && tsrange(starttime, endtime, '(]') + ), + covered AS ( + SELECT + upper(covered_by_downtime) - lower(covered_by_downtime) AS dauer + FROM ( + SELECT + timeframe * downtime AS covered_by_downtime + FROM + relevant + LEFT JOIN downtimes ON timeframe && downtime + WHERE + down + ) AS foo + ), + relevant_down AS ( + SELECT *, + upper(timeframe) - lower(timeframe) AS dauer + FROM + relevant + WHERE + down + ), + final_result AS ( + SELECT + sum(dauer) - ( + SELECT sum(dauer) FROM covered + ) AS total_downtime, + endtime - starttime AS considered, + COALESCE(extract('epoch' from sum(dauer)), 0) AS down_secs, + extract('epoch' from endtime - starttime) AS considered_secs + FROM + relevant_down + ) + +SELECT + 100.0 - down_secs / considered_secs * 100.0 AS availability +FROM + final_result ; +$$; diff --git a/schema/postgresql/slaperiods.sql b/schema/postgresql/slaperiods.sql new file mode 100644 index 0000000..013c4f7 --- /dev/null +++ b/schema/postgresql/slaperiods.sql @@ -0,0 +1,5 @@ +CREATE TABLE icinga_outofsla_periods ( + timeperiod_object_id numeric NOT NULL, + start_time timestamp without time zone NOT NULL, + end_time timestamp without time zone NOT NULL +); diff --git a/schema/postgresql/t/00-create-db.sql b/schema/postgresql/t/00-create-db.sql new file mode 100644 index 0000000..5e0e78a --- /dev/null +++ b/schema/postgresql/t/00-create-db.sql @@ -0,0 +1,4 @@ +--create database icinga2; +CREATE EXTENSION IF NOT EXISTS pgtap; +SELECT plan(1); +SELECT is(count(*) , 1::bigint,'Extension pg_tap installed') FROM pg_extension WHERE extname = 'pgtap'; diff --git a/schema/postgresql/t/01-install.t b/schema/postgresql/t/01-install.t new file mode 100644 index 0000000..03ae35d --- /dev/null +++ b/schema/postgresql/t/01-install.t @@ -0,0 +1,2 @@ +SELECT plan(1); +\i ../install_all.psql diff --git a/schema/postgresql/t/01-statehistory.sql b/schema/postgresql/t/01-statehistory.sql new file mode 100644 index 0000000..1a1a5e2 --- /dev/null +++ b/schema/postgresql/t/01-statehistory.sql @@ -0,0 +1,7 @@ +SELECT plan(0); +CREATE TABLE icinga_statehistory ( + state_time timestamp WITHOUT time zone, + object_id numeric DEFAULT '0'::numeric, + state smallint DEFAULT '0'::smallint, + state_type smallint DEFAULT '0'::smallint +); diff --git a/schema/postgresql/t/02-icinga_objects.sql b/schema/postgresql/t/02-icinga_objects.sql new file mode 100644 index 0000000..74d622b --- /dev/null +++ b/schema/postgresql/t/02-icinga_objects.sql @@ -0,0 +1,5 @@ +SELECT plan(0); +CREATE TABLE icinga_objects ( + object_id numeric DEFAULT '0'::numeric, + objecttype_id smallint DEFAULT '1'::smallint +); diff --git a/schema/postgresql/t/02-servicestatus.sql b/schema/postgresql/t/02-servicestatus.sql new file mode 100644 index 0000000..d94bc0c --- /dev/null +++ b/schema/postgresql/t/02-servicestatus.sql @@ -0,0 +1,7 @@ +SELECT plan(0); +CREATE TABLE icinga_servicestatus ( + service_object_id numeric DEFAULT '0'::numeric, + status_update_time timestamp WITHOUT time zone, + current_state smallint DEFAULT '0'::smallint, + state_type smallint DEFAULT '0'::smallint +); diff --git a/schema/postgresql/t/03-hoststatus.sql b/schema/postgresql/t/03-hoststatus.sql new file mode 100644 index 0000000..3f6b4e8 --- /dev/null +++ b/schema/postgresql/t/03-hoststatus.sql @@ -0,0 +1,7 @@ +SELECT plan(0); +CREATE TABLE icinga_hoststatus ( + host_object_id numeric DEFAULT '0'::numeric, + status_update_time timestamp WITHOUT time zone, + current_state smallint DEFAULT '0'::smallint, + state_type smallint DEFAULT '0'::smallint +); diff --git a/schema/postgresql/t/04-icinga_downtimehistory.sql b/schema/postgresql/t/04-icinga_downtimehistory.sql new file mode 100644 index 0000000..7a9b8b6 --- /dev/null +++ b/schema/postgresql/t/04-icinga_downtimehistory.sql @@ -0,0 +1,10 @@ +SELECT plan(0); +CREATE TABLE icinga_downtimehistory ( + object_id numeric DEFAULT '0'::numeric, + entry_time timestamp WITHOUT time zone, + scheduled_start_time timestamp WITHOUT time zone, + scheduled_end_time timestamp WITHOUT time zone, + was_started smallint DEFAULT '0'::smallint, + actual_start_time timestamp WITHOUT time zone, + actual_end_time timestamp WITHOUT time zone +); diff --git a/schema/postgresql/t/04-icinga_outofsla_periods.sql b/schema/postgresql/t/04-icinga_outofsla_periods.sql new file mode 100644 index 0000000..13bd13b --- /dev/null +++ b/schema/postgresql/t/04-icinga_outofsla_periods.sql @@ -0,0 +1,6 @@ +SELECT plan(0); +CREATE TABLE icinga_outofsla_periods ( + timeperiod_object_id numeric NOT NULL, + start_time timestamp WITHOUT time zone NOT NULL, + end_time timestamp WITHOUT time zone NOT NULL +); diff --git a/schema/postgresql/t/05-fill-downtimes.sql b/schema/postgresql/t/05-fill-downtimes.sql new file mode 100644 index 0000000..f0c77f4 --- /dev/null +++ b/schema/postgresql/t/05-fill-downtimes.sql @@ -0,0 +1,5 @@ +-- Objects get a number and a type (1=host, 2=server) +SELECT plan(1); +INSERT INTO icinga_downtimehistory (object_id,actual_start_time,actual_end_time) VALUES (7,'2019-04-15 11:45:00','2019-04-15 11:50:00'); +INSERT INTO icinga_downtimehistory (object_id,actual_start_time,actual_end_time) VALUES (7,'2019-04-15 12:00:00','2019-04-15 12:05:00'); +SELECT is(count(*), 2::bigint, 'icinga_downtimehistory has correct # of rows') FROM icinga_downtimehistory; diff --git a/schema/postgresql/t/05-fill-hoststatus.sql b/schema/postgresql/t/05-fill-hoststatus.sql new file mode 100644 index 0000000..caca8fb --- /dev/null +++ b/schema/postgresql/t/05-fill-hoststatus.sql @@ -0,0 +1,17 @@ +-- Rows are: hostid,timestamp, status (0 up, 1 down), state_type (always use 1) +SELECT plan(1); +COPY icinga_hoststatus FROM STDIN; +1 2019-02-10 12:00:00+01 0 1 +1 2019-03-10 15:00:00+01 1 1 +3 2019-03-10 16:15:00+01 1 1 +4 2020-03-01 00:00:00+01 1 1 +5 2020-04-01 14:00:00+01 0 1 +6 2019-04-01 13:51:17+01 0 1 +7 2019-04-14 11:00:00+01 0 1 +7 2019-04-15 11:00:00+01 1 1 +7 2019-04-15 12:52:00+01 0 1 +7 2019-04-15 12:55:00+01 1 1 +7 2019-04-15 12:57:00+01 0 1 +\. + +SELECT is(count(*), 11::bigint, 'icinga_hoststatus has correct # of rows') FROM icinga_hoststatus; diff --git a/schema/postgresql/t/05-fill-icinga_objects.sql b/schema/postgresql/t/05-fill-icinga_objects.sql new file mode 100644 index 0000000..1b11eef --- /dev/null +++ b/schema/postgresql/t/05-fill-icinga_objects.sql @@ -0,0 +1,10 @@ +-- Objects get a number and a type (1=host, 2=server) +SELECT plan(1); +INSERT INTO icinga_objects VALUES (1,1); +INSERT INTO icinga_objects VALUES (2,2); +INSERT INTO icinga_objects VALUES (3,1); +INSERT INTO icinga_objects VALUES (4,1); +INSERT INTO icinga_objects VALUES (5,1); +INSERT INTO icinga_objects VALUES (6,1); +INSERT INTO icinga_objects VALUES (7,1); +SELECT is(count(*), 7::bigint, 'icinga_objects has correct # of rows') FROM icinga_objects; diff --git a/schema/postgresql/t/05-fill-servicestatus.sql b/schema/postgresql/t/05-fill-servicestatus.sql new file mode 100644 index 0000000..5ae413f --- /dev/null +++ b/schema/postgresql/t/05-fill-servicestatus.sql @@ -0,0 +1,7 @@ +SELECT plan(1); +COPY icinga_servicestatus FROM STDIN; +2 2019-02-10 12:00:00+01 0 1 +2 2019-03-10 15:00:00+01 2 1 +\. +SELECT is(count(*), 2::bigint, 'icinga_servicestatus has 2 rows') FROM icinga_servicestatus; + diff --git a/schema/postgresql/t/05-fill-statehistory.sql b/schema/postgresql/t/05-fill-statehistory.sql new file mode 100644 index 0000000..f66afbd --- /dev/null +++ b/schema/postgresql/t/05-fill-statehistory.sql @@ -0,0 +1,28 @@ +-- Rows are: state_time, object_id, state, state_type +SELECT plan(1); +COPY icinga_statehistory FROM STDIN; +2019-02-01 00:00:00+01 1 0 1 +2019-02-05 11:00:00+01 1 3 0 +2019-02-05 12:00:00+01 1 3 1 +2019-02-05 13:00:00+01 1 0 0 +2019-02-05 14:00:00+01 1 0 1 +2019-03-01 00:00:00+01 1 0 1 +2019-03-05 11:00:00+01 1 3 0 +2019-03-05 12:00:00+01 1 3 1 +2019-02-01 00:00:00+01 2 0 1 +2019-02-05 11:00:00+01 2 3 0 +2019-02-05 12:00:00+01 2 3 1 +2019-02-05 13:00:00+01 2 0 0 +2019-02-05 14:00:00+01 2 0 1 +2019-03-01 00:00:00+01 2 0 1 +2019-03-05 11:00:00+01 2 3 0 +2019-03-05 12:00:00+01 2 3 1 +2019-03-10 16:05:00+01 3 1 0 +2019-03-10 16:10:00+01 3 1 1 +2020-03-01 14:00:00+01 4 0 1 +2020-04-01 00:00:00+01 5 1 1 +2019-03-31 21:50:48+01 6 1 1 +2019-03-27 15:15:42+01 6 0 1 +2019-03-27 01:00:00+01 7 0 1 +\. +SELECT is(count(*), 23::bigint, 'icinga_statehistory has correct # of rows rows') FROM icinga_statehistory; diff --git a/schema/postgresql/t/06-get_sla_ok_percent.sql b/schema/postgresql/t/06-get_sla_ok_percent.sql new file mode 100644 index 0000000..4ce9e39 --- /dev/null +++ b/schema/postgresql/t/06-get_sla_ok_percent.sql @@ -0,0 +1,3 @@ +SELECT plan(1); +\i get_sla_ok_percent.sql +SELECT is(COUNT(*),1::bigint) FROM pg_catalog.pg_proc WHERE proname = 'idoreports_get_sla_ok_percent'; diff --git a/schema/postgresql/t/07-test-func.sql b/schema/postgresql/t/07-test-func.sql new file mode 100644 index 0000000..5c370e3 --- /dev/null +++ b/schema/postgresql/t/07-test-func.sql @@ -0,0 +1,33 @@ +SELECT plan(27); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-02-05 12:00', '2019-02-05 14:00')::float , 0.0::float,'Host 1 was down 2 out of 2 hours'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-02-05 10:00', '2019-02-05 14:00')::float , 50.0::float,'Host 1 was down 2 out of 4 hours'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-02-05 10:00', '2019-02-05 18:00')::float , 75.0::float,'Host 1 was down 2 out of 8 hours'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-02-04 10:00', '2019-02-04 18:00')::float , 100.0::float,'Host 1 was not down before 02/05 12:00'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-02-06 10:00', '2019-02-08 18:00')::float , 100.0::float,'Host 1 was not down after 02/05 14:00'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-02-04 13:00', '2019-02-05 13:00')::float , 95.83333333333333::float,'Host 1 was down for the last hour of checked timeframe'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-02-05 13:00', '2019-02-06 13:00')::float , 95.83333333333333::float,'Host 1 was down for the first hour of checked timeframe'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-03-05 11:00', '2019-03-05 13:00')::float , 50.0::float,'Host 1 was down 1 out of 2 hours'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-03-05 12:00', '2019-03-05 13:00')::float , 0.0::float,'Host 1 was down during that period'); +SELECT is(idoreports_get_sla_ok_percent(1,'2019-03-05 13:00', '2019-03-05 14:00')::float , 0.0::float,'Host 1 was down during that period'); + +SELECT is(idoreports_get_sla_ok_percent(2,'2019-02-05 12:00', '2019-02-05 14:00')::float , 0.0::float,'Service 2 was down 2 out of 2 hours'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-02-05 10:00', '2019-02-05 14:00')::float , 50.0::float,'Service 2 was down 2 out of 4 hours'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-02-05 10:00', '2019-02-05 18:00')::float , 75.0::float,'Service 2 was down 2 out of 8 hours'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-02-04 10:00', '2019-02-04 18:00')::float , 100.0::float,'Service 2 was not down before 02/05 12:00'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-02-06 10:00', '2019-02-08 18:00')::float , 100.0::float,'Service 2 was not down after 02/05 14:00'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-02-04 13:00', '2019-02-05 13:00')::float , 95.83333333333333::float,'Service 2 was down for the last hour of checked timeframe'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-02-05 13:00', '2019-02-06 13:00')::float , 95.83333333333333::float,'Service 2 was down for the first hour of checked timeframe'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-03-05 11:00', '2019-03-05 13:00')::float , 50.0::float,'Service 2 was down 1 out of 2 hours'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-03-05 12:00', '2019-03-05 13:00')::float , 0.0::float,'Service 2 was down during that period'); +SELECT is(idoreports_get_sla_ok_percent(2,'2019-03-05 13:00', '2019-03-05 14:00')::float , 0.0::float,'Service 2 was down during that period'); + +SELECT is(idoreports_get_sla_ok_percent(3,'2019-03-10 17:00', '2019-03-11 00:00')::float , 0.0::float,'Host 3 was considered down for the rest of the day'); + +SELECT is(idoreports_get_sla_ok_percent(4,'2020-03-01 12:00', '2020-03-01 16:00')::float , 50.0::float,'Host 4 was considered down for 2 hours in a 4 hours time range starting with UP'); +SELECT is(idoreports_get_sla_ok_percent(5,'2020-04-01 12:00', '2020-04-01 16:00')::float , 50.0::float,'Host 5 was considered down for 2 hours in a 4 hours time range starting with DOWN'); + +SELECT is(idoreports_get_sla_ok_percent(6,'2019-04-01 11:43:16','2019-04-01 15:43:16')::float , 46.65972222222222::float,'Host 6 was down until recently'); + +SELECT is(idoreports_get_sla_ok_percent(7,'2019-04-15 10:00:00','2019-04-15 15:00:00')::float , 95.0::float,'Host 7 had a planned downtime but went down before that started'); +SELECT is(idoreports_get_sla_ok_percent(7,'2019-04-15 11:45:01','2019-04-15 11:49:59')::float , 100.0::float,'Host 7 had a planned downtime'); +SELECT is(idoreports_get_sla_ok_percent(7,'2019-04-15 00:00:00','2019-04-16 00:00:00')::float , 80.0::float,'Host 7 had two planned downtimes which were correctly assessed'); diff --git a/schema/postgresql/t/README.md b/schema/postgresql/t/README.md new file mode 100644 index 0000000..5467be9 --- /dev/null +++ b/schema/postgresql/t/README.md @@ -0,0 +1,18 @@ +Tests for the idoreports_get_sla_ok_percent() function +====================================================== + +These are pg_tap tests. You need pg_tap installed for the PG version you want to test on, e.g. "postgresql-12-pgtap" for Debian/Ubuntu. + +I used these to find the cause for some seamingly strange NULL results. +Which were basically due to a badly chosen ALIAS ("state_time" on line 20, which is now "state_time_"). + +I run these tests on an Ubuntu/Debian system with "pg_virtualenv" like this: + +``` +pg_virtualenv -s pg_prove t/0*.sql +``` +or simply +``` +t/testme.sh +``` + diff --git a/schema/postgresql/t/get_sla_ok_percent.sql b/schema/postgresql/t/get_sla_ok_percent.sql new file mode 120000 index 0000000..69aa266 --- /dev/null +++ b/schema/postgresql/t/get_sla_ok_percent.sql @@ -0,0 +1 @@ +../get_sla_ok_percent.sql
\ No newline at end of file diff --git a/schema/postgresql/t/plain_sql_zum_debuggen.sql b/schema/postgresql/t/plain_sql_zum_debuggen.sql new file mode 100644 index 0000000..7adf80e --- /dev/null +++ b/schema/postgresql/t/plain_sql_zum_debuggen.sql @@ -0,0 +1,185 @@ +--SELECT is(idoreports_get_sla_ok_percent(4,'2020-03-01 12:00', '2020-03-01 16:00')::float , 50.0::float,'Host 4 was considered down for 2 hours in a 4 hours time range starting with UP'); +--SELECT is(idoreports_get_sla_ok_percent(5,'2020-04-01 12:00', '2020-04-01 16:00')::float , 50.0::float,'Host 5 was considered down for 2 hours in a 4 hours time range starting with DOWN'); +\set id 5 +\set start '2020-04-01 12:00' +\set end '2020-04-01 16:00' +\set sla_id null + +--'2019-02-19 00:00:00','2019-02-20 10:00:00' +--12347 + +WITH crit AS ( + SELECT CASE objecttype_id + WHEN 1 THEN 0 + WHEN 2 THEN 1 + END + AS value + FROM icinga_objects + WHERE object_id = :id +), +before AS ( + -- low border, last event before the range we are looking for: + SELECT down, state_time_ AS state_time,state FROM ( + (SELECT 1 AS prio + ,state > crit.value AS down + ,GREATEST(state_time,:'start') AS state_time_ + ,state + FROM icinga_statehistory,crit + WHERE + object_id = :id + AND state_time < :'start' + AND state_type = 1 + ORDER BY state_time DESC + LIMIT 1) + UNION ALL + (SELECT 2 AS prio + ,state > crit.value AS down + ,GREATEST(state_time,:'start') AS state_time_ + ,state + FROM icinga_statehistory,crit + WHERE + object_id = :id + AND state_time < :'start' + ORDER BY state_time DESC + LIMIT 1) + + ) ranked ORDER BY prio + LIMIT 1 +) SELECT * FROM before; +,all_hard_events AS ( + -- the actual range we're looking for: + SELECT state > crit.value AS down + ,state_time + ,state + FROM icinga_statehistory,crit + WHERE + object_id = :id + AND state_time >= :'start' + AND state_time <= :'end' + AND state_type = 1 +), + +after AS ( + -- the "younger" of the current host/service state and the first recorded event + (SELECT state > crit_value AS down + ,LEAST(state_time,:'end') AS state_time + ,state + + FROM ( + (SELECT state_time + ,state + ,crit.value crit_value + FROM icinga_statehistory,crit + WHERE + object_id = :id + AND state_time > :'end' + AND state_type = 1 + ORDER BY state_time ASC + LIMIT 1) + + UNION ALL + + SELECT status_update_time + ,current_state + ,crit.value crit_value + FROM icinga_hoststatus,crit + WHERE host_object_id = :id + AND state_type = 1 + + UNION ALL + + SELECT status_update_time + ,current_state + ,crit.value crit_value + FROM icinga_servicestatus,crit + WHERE service_object_id = :id + AND state_type = 1 + ) AS after_searched_period + ORDER BY state_time ASC LIMIT 1) +) +, allevents AS ( + TABLE before + UNION ALL + TABLE all_hard_events + UNION ALL + TABLE after +) --SELECT * FROM allevents; +, downtimes AS ( + SELECT tsrange( + --GREATEST(actual_start_time, :'start') + --, LEAST(actual_end_time, :'end') + actual_start_time + , actual_end_time + ) AS downtime + FROM icinga_downtimehistory + WHERE object_id = :id +-- AND actual_start_time <= :'end' +-- AND COALESCE(actual_end_time,:'start') >= :'start' + + UNION ALL + + SELECT tsrange( + --GREATEST(start_time, :'start') + --, LEAST(end_time, :'end') + start_time + , end_time + ) AS downtime + FROM icinga_outofsla_periods + WHERE timeperiod_object_id = :sla_id + +) --SELECT * FROM allevents; +, enriched AS ( + SELECT down + ,tsrange(state_time, COALESCE(lead(state_time) OVER w, :'end'),'(]') AS timeframe + --,lead(state_time) OVER w - state_time AS dauer + FROM ( + SELECT state > crit.value AS down + , lead(state,1,state) OVER w > crit.value AS next_down + , lag(state,1,state) OVER w > crit.value AS prev_down + , state_time + , state + FROM allevents,crit + WINDOW w AS (ORDER BY state_time) + ) alle + --WHERE down != next_down OR down != prev_down + WINDOW w AS (ORDER BY state_time) +) +, relevant AS ( + SELECT down + ,timeframe * tsrange(:'start',:'end','(]') AS timeframe + FROM enriched + WHERE timeframe && tsrange(:'start',:'end','(]') +) SELECT * FROM relevant; + +, relevant_down AS ( + SELECT timeframe + ,down + ,timeframe * downtime AS covered + ,COALESCE( + timeframe - downtime + ,timeframe + ) AS not_covered + FROM relevant + LEFT JOIN downtimes + ON timeframe && downtime + WHERE down +) -- SELECT * FROM relevant_down; + +, effective_downtimes AS ( + SELECT not_covered + , upper(not_covered) - lower(not_covered) AS dauer + FROM relevant_down +) --SELECT * FROM effective_downtimes; + +, final_result AS ( + SELECT sum(dauer) AS total_downtime + , timestamp :'end' - timestamp :'start' AS considered + , COALESCE(extract ('epoch' from sum(dauer)),0) AS down_secs + , extract ('epoch' from timestamp :'end' - timestamp :'start' ) AS considered_secs + FROM effective_downtimes +) --SELECT * FROM final_result; + +SELECT :'start' AS starttime, :'end' AS endtime,* +, 100.0 - down_secs / considered_secs * 100.0 AS availability +FROM final_result +; diff --git a/schema/postgresql/t/testme.sh b/schema/postgresql/t/testme.sh new file mode 100755 index 0000000..ce61abc --- /dev/null +++ b/schema/postgresql/t/testme.sh @@ -0,0 +1,2 @@ +#!/bin/sh +pg_virtualenv -s pg_prove $(dirname $0)/0*.sql |