summaryrefslogtreecommitdiffstats
path: root/tests/sql/sla_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'tests/sql/sla_test.go')
-rw-r--r--tests/sql/sla_test.go406
1 files changed, 406 insertions, 0 deletions
diff --git a/tests/sql/sla_test.go b/tests/sql/sla_test.go
new file mode 100644
index 0000000..8a89850
--- /dev/null
+++ b/tests/sql/sla_test.go
@@ -0,0 +1,406 @@
+package sql_test
+
+import (
+ "crypto/rand"
+ "database/sql/driver"
+ "fmt"
+ "github.com/go-sql-driver/mysql"
+ "github.com/jmoiron/sqlx"
+ "github.com/lib/pq"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "testing"
+)
+
+func TestSla(t *testing.T) {
+ rdb := getDatabase(t)
+ db, err := sqlx.Open(rdb.Driver(), rdb.DSN())
+ require.NoError(t, err, "connect to database")
+
+ type TestData struct {
+ Name string
+ Events []SlaHistoryEvent
+ Start uint64
+ End uint64
+ Expected float64
+ }
+
+ tests := []TestData{{
+ Name: "EmptyHistory",
+ // Empty history implies no previous problem state, therefore SLA should be 100%
+ Events: nil,
+ Start: 1000,
+ End: 2000,
+ Expected: 100.0,
+ }, {
+ Name: "MultipleStateChanges",
+ // Some flapping, test that all changes are considered.
+ Events: []SlaHistoryEvent{
+ &State{Time: 1000, State: 2, PreviousState: 99}, // -10%
+ &State{Time: 1100, State: 0, PreviousState: 2},
+ &State{Time: 1300, State: 2, PreviousState: 0}, // -10%
+ &State{Time: 1400, State: 0, PreviousState: 2},
+ &State{Time: 1600, State: 2, PreviousState: 0}, // -10%
+ &State{Time: 1700, State: 0, PreviousState: 2},
+ &State{Time: 1900, State: 2, PreviousState: 0}, // -10%
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 60.0,
+ }, {
+ Name: "OverlappingDowntimesAndProblems",
+ // SLA should be 90%:
+ // 1000..1100: OK, no downtime
+ // 1100..1200: OK, in downtime
+ // 1200..1300: CRITICAL, in downtime
+ // 1300..1400: CRITICAL, no downtime (only period counting for SLA, -10%)
+ // 1400..1500: CRITICAL, in downtime
+ // 1500..1600: OK, in downtime
+ // 1600..2000: OK, no downtime
+ Events: []SlaHistoryEvent{
+ &Downtime{Start: 1100, End: 1300},
+ &Downtime{Start: 1400, End: 1600},
+ &State{Time: 1200, State: 2, PreviousState: 0},
+ &State{Time: 1500, State: 0, PreviousState: 2},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 90.0,
+ }, {
+ Name: "CriticalBeforeInterval",
+ // If there is no event within the SLA interval, the last state from before the interval should be used.
+ Events: []SlaHistoryEvent{
+ &State{Time: 0, State: 2, PreviousState: 99},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 0.0,
+ }, {
+ Name: "CriticalBeforeIntervalWithDowntime",
+ // State change and downtime start from before the SLA interval should be considered if still relevant.
+ Events: []SlaHistoryEvent{
+ &State{Time: 800, State: 2, PreviousState: 99},
+ &Downtime{Start: 600, End: 1800},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 80.0,
+ }, {
+ Name: "CriticalBeforeIntervalWithOverlappingDowntimes",
+ // Test that overlapping downtimes are properly accounted for.
+ Events: []SlaHistoryEvent{
+ &State{Time: 800, State: 2, PreviousState: 99},
+ &Downtime{Start: 600, End: 1000},
+ &Downtime{Start: 800, End: 1200},
+ &Downtime{Start: 1000, End: 1400},
+ // Everything except 1400-1600 is covered by downtimes, -20%
+ &Downtime{Start: 1600, End: 2000},
+ &Downtime{Start: 1800, End: 2200},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 80.0,
+ }, {
+ Name: "FallbackToPreviousState",
+ // If there is no state event from before the SLA interval, the previous hard state from the first event
+ // after the beginning of the SLA interval should be used as the initial state.
+ Events: []SlaHistoryEvent{
+ &State{Time: 1200, State: 0, PreviousState: 2},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 80.0,
+ }, {
+ Name: "FallbackToCurrentState",
+ // If there are no state history events, the current state of the checkable should be used.
+ Events: []SlaHistoryEvent{
+ &CurrentState{State: 2},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 0.0,
+ }, {
+ Name: "PreferInitialStateFromBeforeOverLaterState",
+ // The previous_hard_state should only be used as a fallback when there is no event from before the
+ // SLA interval. Therefore, the latter should be preferred if there is conflicting information.
+ Events: []SlaHistoryEvent{
+ &State{Time: 800, State: 2, PreviousState: 99},
+ &State{Time: 1200, State: 0, PreviousState: 0},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 80.0,
+ }, {
+ Name: "PreferInitialStateFromBeforeOverCurrentState",
+ // The current state should only be used as a fallback when there is no state history event.
+ // Therefore, the latter should be preferred if there is conflicting information.
+ Events: []SlaHistoryEvent{
+ &State{Time: 800, State: 2, PreviousState: 99},
+ &CurrentState{State: 0},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 0.0,
+ }, {
+ Name: "PreferLaterStateOverCurrentState",
+ // The current state should only be used as a fallback when there is no state history event.
+ // Therefore, the latter should be preferred if there is conflicting information.
+ Events: []SlaHistoryEvent{
+ &State{Time: 1200, State: 0, PreviousState: 2},
+ &CurrentState{State: 2},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 80.0,
+ }, {
+ Name: "InitialUnknownReducesTotalTime",
+ Events: []SlaHistoryEvent{
+ &State{Time: 1500, State: 2, PreviousState: 99},
+ &State{Time: 1700, State: 0, PreviousState: 2},
+ &CurrentState{State: 0},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 60,
+ }, {
+ Name: "IntermediateUnknownReducesTotalTime",
+ Events: []SlaHistoryEvent{
+ &State{Time: 1000, State: 0, PreviousState: 2},
+ &State{Time: 1100, State: 2, PreviousState: 0},
+ &State{Time: 1600, State: 0, PreviousState: 99},
+ &State{Time: 1800, State: 2, PreviousState: 0},
+ &CurrentState{State: 0},
+ },
+ Start: 1000,
+ End: 2000,
+ Expected: 60,
+ }}
+
+ for _, test := range tests {
+ t.Run(test.Name, func(t *testing.T) {
+ testSla(t, db, test.Events, test.Start, test.End, test.Expected, "unexpected SLA value")
+ })
+ }
+
+ t.Run("Invalid", func(t *testing.T) {
+ m := SlaHistoryMeta{
+ EnvironmentId: make([]byte, 20),
+ EndpointId: make([]byte, 20),
+ ObjectType: "host",
+ HostId: make([]byte, 20),
+ }
+
+ checkErr := func(t *testing.T, err error) {
+ require.Error(t, err, "SLA function should return an error")
+
+ switch d := db.DriverName(); d {
+ case "mysql":
+ var mysqlErr *mysql.MySQLError
+ require.ErrorAs(t, err, &mysqlErr, "SLA function should return a MySQL error")
+ // https://dev.mysql.com/doc/mysql-errors/8.0/en/server-error-reference.html#error_er_signal_exception
+ assert.Equal(t, uint16(1644), mysqlErr.Number, "MySQL error should be ER_SIGNAL_EXCEPTION")
+ assert.Equal(t, "end time must be greater than start time", mysqlErr.Message,
+ "MySQL error should contain custom message")
+
+ case "postgres":
+ var pqErr *pq.Error
+ require.ErrorAs(t, err, &pqErr, "SLA function should return a PostgreSQL error")
+ assert.Equal(t, pq.ErrorCode("P0001"), pqErr.Code, "MySQL error should be ER_SIGNAL_EXCEPTION")
+ assert.Equal(t, "end time must be greater than start time", pqErr.Message,
+ "PostgreSQL error should contain custom message")
+
+ default:
+ panic(fmt.Sprintf("unknown database driver %q", d))
+ }
+ }
+
+ t.Run("ZeroDuration", func(t *testing.T) {
+ _, err := execSqlSlaFunc(db, &m, 1000, 1000)
+ checkErr(t, err)
+ })
+
+ t.Run("NegativeDuration", func(t *testing.T) {
+ _, err := execSqlSlaFunc(db, &m, 2000, 1000)
+ checkErr(t, err)
+ })
+ })
+}
+
+func execSqlSlaFunc(db *sqlx.DB, m *SlaHistoryMeta, start uint64, end uint64) (float64, error) {
+ var result float64
+ err := db.Get(&result, db.Rebind("SELECT get_sla_ok_percent(?, ?, ?, ?)"),
+ m.HostId, m.ServiceId, start, end)
+ return result, err
+}
+
+func testSla(t *testing.T, db *sqlx.DB, events []SlaHistoryEvent, start uint64, end uint64, expected float64, msg string) {
+ t.Run("Host", func(t *testing.T) {
+ testSlaWithObjectType(t, db, events, false, start, end, expected, msg)
+ })
+ t.Run("Service", func(t *testing.T) {
+ testSlaWithObjectType(t, db, events, true, start, end, expected, msg)
+ })
+}
+
+func testSlaWithObjectType(t *testing.T, db *sqlx.DB,
+ events []SlaHistoryEvent, service bool, start uint64, end uint64, expected float64, msg string,
+) {
+ makeId := func() []byte {
+ id := make([]byte, 20)
+ _, err := rand.Read(id)
+ require.NoError(t, err, "generating random id failed")
+ return id
+ }
+
+ meta := SlaHistoryMeta{
+ EnvironmentId: makeId(),
+ EndpointId: makeId(),
+ HostId: makeId(),
+ }
+ if service {
+ meta.ObjectType = "service"
+ meta.ServiceId = makeId()
+ } else {
+ meta.ObjectType = "host"
+ }
+
+ for _, event := range events {
+ err := event.WriteSlaEventToDatabase(db, &meta)
+ require.NoErrorf(t, err, "Inserting SLA history for %#v failed", event)
+ }
+
+ r, err := execSqlSlaFunc(db, &meta, start, end)
+ require.NoError(t, err, "SLA query should not fail")
+ assert.Equal(t, expected, r, msg)
+}
+
+type SlaHistoryMeta struct {
+ EnvironmentId NullableBytes `db:"environment_id"`
+ EndpointId NullableBytes `db:"endpoint_id"`
+ ObjectType string `db:"object_type"`
+ HostId NullableBytes `db:"host_id"`
+ ServiceId NullableBytes `db:"service_id"`
+}
+
+type SlaHistoryEvent interface {
+ WriteSlaEventToDatabase(db *sqlx.DB, m *SlaHistoryMeta) error
+}
+
+type State struct {
+ Time uint64
+ State uint8
+ PreviousState uint8
+}
+
+var _ SlaHistoryEvent = (*State)(nil)
+
+func (s *State) WriteSlaEventToDatabase(db *sqlx.DB, m *SlaHistoryMeta) error {
+ type values struct {
+ *SlaHistoryMeta
+ Id []byte `db:"id"`
+ EventTime uint64 `db:"event_time"`
+ HardState uint8 `db:"hard_state"`
+ PreviousHardState uint8 `db:"previous_hard_state"`
+ }
+
+ id := make([]byte, 20)
+ _, err := rand.Read(id)
+ if err != nil {
+ return err
+ }
+
+ _, err = db.NamedExec("INSERT INTO sla_history_state"+
+ " (id, environment_id, endpoint_id, object_type, host_id, service_id, event_time, hard_state, previous_hard_state)"+
+ " VALUES (:id, :environment_id, :endpoint_id, :object_type, :host_id, :service_id, :event_time, :hard_state, :previous_hard_state)",
+ &values{
+ SlaHistoryMeta: m,
+ Id: id[:],
+ EventTime: s.Time,
+ HardState: s.State,
+ PreviousHardState: s.PreviousState,
+ })
+ return err
+}
+
+type CurrentState struct {
+ State uint8
+}
+
+func (c *CurrentState) WriteSlaEventToDatabase(db *sqlx.DB, m *SlaHistoryMeta) error {
+ type values struct {
+ *SlaHistoryMeta
+ State uint8 `db:"state"`
+ PropertiesChecksum NullableBytes `db:"properties_checksum"`
+ }
+
+ v := values{
+ SlaHistoryMeta: m,
+ State: c.State,
+ PropertiesChecksum: make([]byte, 20),
+ }
+
+ if len(m.ServiceId) == 0 {
+ _, err := db.NamedExec("INSERT INTO host_state"+
+ " (id, host_id, environment_id, properties_checksum, soft_state, previous_soft_state,"+
+ " hard_state, previous_hard_state, check_attempt, severity, last_state_change, next_check, next_update)"+
+ " VALUES (:host_id, :host_id, :environment_id, :properties_checksum, :state, :state, :state, :state, 0, 0, 0, 0, 0)",
+ &v)
+ return err
+ } else {
+ _, err := db.NamedExec("INSERT INTO service_state"+
+ " (id, host_id, service_id, environment_id, properties_checksum, soft_state, previous_soft_state,"+
+ " hard_state, previous_hard_state, check_attempt, severity, last_state_change, next_check, next_update)"+
+ " VALUES (:service_id, :host_id, :service_id, :environment_id, :properties_checksum, :state, :state, :state, :state, 0, 0, 0, 0, 0)",
+ &v)
+ return err
+ }
+}
+
+var _ SlaHistoryEvent = (*CurrentState)(nil)
+
+type Downtime struct {
+ Start uint64
+ End uint64
+}
+
+var _ SlaHistoryEvent = (*Downtime)(nil)
+
+type slaHistoryDowntime struct {
+ *SlaHistoryMeta
+ DowntimeId []byte `db:"downtime_id"`
+ DowntimeStart uint64 `db:"downtime_start"`
+ DowntimeEnd uint64 `db:"downtime_end"`
+}
+
+func (d *Downtime) WriteSlaEventToDatabase(db *sqlx.DB, m *SlaHistoryMeta) error {
+ downtimeId := make([]byte, 20)
+ _, err := rand.Read(downtimeId)
+ if err != nil {
+ return err
+ }
+
+ _, err = db.NamedExec("INSERT INTO sla_history_downtime"+
+ " (environment_id, endpoint_id, object_type, host_id, service_id, downtime_id, downtime_start, downtime_end)"+
+ " VALUES (:environment_id, :endpoint_id, :object_type, :host_id,"+
+ " :service_id, :downtime_id, :downtime_start, :downtime_end)",
+ &slaHistoryDowntime{
+ SlaHistoryMeta: m,
+ DowntimeId: downtimeId[:],
+ DowntimeStart: d.Start,
+ DowntimeEnd: d.End,
+ })
+ return err
+}
+
+// NullableBytes allows writing to binary columns in a database with support for NULL.
+type NullableBytes []byte
+
+// Value implements the database/sql/driver.Valuer interface.
+func (b NullableBytes) Value() (driver.Value, error) {
+ if b != nil {
+ return []byte(b), nil
+ }
+
+ // any(nil) is treated as NULL in contrast to []byte(nil) which is a non-NULL byte sequence of length 0.
+ return nil, nil
+}