summaryrefslogtreecommitdiffstats
path: root/lib/icinga/checkable-notification.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 12:34:54 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 12:34:54 +0000
commit0915b3ef56dfac3113cce55a59a5765dc94976be (patch)
treea8fea11d50b4f083e1bf0f90025ece7f0824784a /lib/icinga/checkable-notification.cpp
parentInitial commit. (diff)
downloadicinga2-0915b3ef56dfac3113cce55a59a5765dc94976be.tar.xz
icinga2-0915b3ef56dfac3113cce55a59a5765dc94976be.zip
Adding upstream version 2.13.6.upstream/2.13.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/icinga/checkable-notification.cpp')
-rw-r--r--lib/icinga/checkable-notification.cpp334
1 files changed, 334 insertions, 0 deletions
diff --git a/lib/icinga/checkable-notification.cpp b/lib/icinga/checkable-notification.cpp
new file mode 100644
index 0000000..779ed4b
--- /dev/null
+++ b/lib/icinga/checkable-notification.cpp
@@ -0,0 +1,334 @@
+/* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
+
+#include "icinga/checkable.hpp"
+#include "icinga/host.hpp"
+#include "icinga/icingaapplication.hpp"
+#include "icinga/service.hpp"
+#include "base/dictionary.hpp"
+#include "base/objectlock.hpp"
+#include "base/logger.hpp"
+#include "base/exception.hpp"
+#include "base/context.hpp"
+#include "base/convert.hpp"
+#include "base/lazy-init.hpp"
+#include "remote/apilistener.hpp"
+
+using namespace icinga;
+
+boost::signals2::signal<void (const Notification::Ptr&, const Checkable::Ptr&, const std::set<User::Ptr>&,
+ const NotificationType&, const CheckResult::Ptr&, const String&, const String&,
+ const MessageOrigin::Ptr&)> Checkable::OnNotificationSentToAllUsers;
+boost::signals2::signal<void (const Notification::Ptr&, const Checkable::Ptr&, const User::Ptr&,
+ const NotificationType&, const CheckResult::Ptr&, const String&, const String&, const String&,
+ const MessageOrigin::Ptr&)> Checkable::OnNotificationSentToUser;
+
+void Checkable::ResetNotificationNumbers()
+{
+ for (const Notification::Ptr& notification : GetNotifications()) {
+ ObjectLock olock(notification);
+ notification->ResetNotificationNumber();
+ }
+}
+
+void Checkable::SendNotifications(NotificationType type, const CheckResult::Ptr& cr, const String& author, const String& text)
+{
+ String checkableName = GetName();
+
+ CONTEXT("Sending notifications for object '" + checkableName + "'");
+
+ bool force = GetForceNextNotification();
+
+ SetForceNextNotification(false);
+
+ if (!IcingaApplication::GetInstance()->GetEnableNotifications() || !GetEnableNotifications()) {
+ if (!force) {
+ Log(LogInformation, "Checkable")
+ << "Notifications are disabled for checkable '" << checkableName << "'.";
+ return;
+ }
+ }
+
+ std::set<Notification::Ptr> notifications = GetNotifications();
+
+ String notificationTypeName = Notification::NotificationTypeToString(type);
+
+ // Bail early if there are no notifications.
+ if (notifications.empty()) {
+ Log(LogNotice, "Checkable")
+ << "Skipping checkable '" << checkableName << "' which doesn't have any notification objects configured.";
+ return;
+ }
+
+ Log(LogInformation, "Checkable")
+ << "Checkable '" << checkableName << "' has " << notifications.size()
+ << " notification(s). Checking filters for type '" << notificationTypeName << "', sends will be logged.";
+
+ for (const Notification::Ptr& notification : notifications) {
+ // Re-send stashed notifications from cold startup.
+ if (ApiListener::UpdatedObjectAuthority()) {
+ try {
+ if (!notification->IsPaused()) {
+ auto stashedNotifications (notification->GetStashedNotifications());
+
+ if (stashedNotifications->GetLength()) {
+ Log(LogNotice, "Notification")
+ << "Notification '" << notification->GetName() << "': there are some stashed notifications. Stashing notification to preserve order.";
+
+ stashedNotifications->Add(new Dictionary({
+ {"notification_type", type},
+ {"cr", cr},
+ {"force", force},
+ {"reminder", false},
+ {"author", author},
+ {"text", text}
+ }));
+ } else {
+ notification->BeginExecuteNotification(type, cr, force, false, author, text);
+ }
+ } else {
+ Log(LogNotice, "Notification")
+ << "Notification '" << notification->GetName() << "': HA cluster active, this endpoint does not have the authority (paused=true). Skipping.";
+ }
+ } catch (const std::exception& ex) {
+ Log(LogWarning, "Checkable")
+ << "Exception occurred during notification '" << notification->GetName() << "' for checkable '"
+ << GetName() << "': " << DiagnosticInformation(ex, false);
+ }
+ } else {
+ // Cold startup phase. Stash notification for later.
+ Log(LogNotice, "Notification")
+ << "Notification '" << notification->GetName() << "': object authority hasn't been updated, yet. Stashing notification.";
+
+ notification->GetStashedNotifications()->Add(new Dictionary({
+ {"notification_type", type},
+ {"cr", cr},
+ {"force", force},
+ {"reminder", false},
+ {"author", author},
+ {"text", text}
+ }));
+ }
+ }
+}
+
+std::set<Notification::Ptr> Checkable::GetNotifications() const
+{
+ std::unique_lock<std::mutex> lock(m_NotificationMutex);
+ return m_Notifications;
+}
+
+void Checkable::RegisterNotification(const Notification::Ptr& notification)
+{
+ std::unique_lock<std::mutex> lock(m_NotificationMutex);
+ m_Notifications.insert(notification);
+}
+
+void Checkable::UnregisterNotification(const Notification::Ptr& notification)
+{
+ std::unique_lock<std::mutex> lock(m_NotificationMutex);
+ m_Notifications.erase(notification);
+}
+
+void Checkable::FireSuppressedNotifications()
+{
+ if (!IsActive())
+ return;
+
+ if (IsPaused())
+ return;
+
+ if (!GetEnableNotifications())
+ return;
+
+ int suppressed_types (GetSuppressedNotifications());
+ if (!suppressed_types)
+ return;
+
+ int subtract = 0;
+
+ {
+ LazyInit<bool> wasLastParentRecoveryRecent ([this]() {
+ auto cr (GetLastCheckResult());
+
+ if (!cr) {
+ return true;
+ }
+
+ auto threshold (cr->GetExecutionStart());
+ Host::Ptr host;
+ Service::Ptr service;
+ tie(host, service) = GetHostService(this);
+
+ if (service) {
+ ObjectLock oLock (host);
+
+ if (!host->GetProblem() && host->GetLastStateChange() >= threshold) {
+ return true;
+ }
+ }
+
+ for (auto& dep : GetDependencies()) {
+ auto parent (dep->GetParent());
+ ObjectLock oLock (parent);
+
+ if (!parent->GetProblem() && parent->GetLastStateChange() >= threshold) {
+ return true;
+ }
+ }
+
+ return false;
+ });
+
+ if (suppressed_types & (NotificationProblem|NotificationRecovery)) {
+ CheckResult::Ptr cr = GetLastCheckResult();
+ NotificationType type = cr && IsStateOK(cr->GetState()) ? NotificationRecovery : NotificationProblem;
+ bool state_suppressed = NotificationReasonSuppressed(NotificationProblem) || NotificationReasonSuppressed(NotificationRecovery);
+
+ /* Only process (i.e. send or dismiss) suppressed state notifications if the following conditions are met:
+ *
+ * 1. State notifications are not suppressed at the moment. State notifications must only be removed from
+ * the suppressed notifications bitset after the reason for the suppression is gone as these bits are
+ * used as a marker for when to set the state_before_suppression attribute.
+ * 2. The checkable is in a hard state. Soft states represent a state where we are not certain yet about
+ * the actual state and wait with sending notifications. If we want to immediately send a notification,
+ * we might send a recovery notification for something that just started failing or a problem
+ * notification which might be for an intermittent problem that would have never received a
+ * notification if there was no suppression as it still was in a soft state. Both cases aren't ideal so
+ * better wait until we are certain.
+ * 3. The checkable isn't likely checked soon. For example, if a downtime ended, give the checkable a
+ * chance to recover afterwards before sending a notification.
+ * 4. No parent recovered recently. Similar to the previous condition, give the checkable a chance to
+ * recover after one of its dependencies recovered before sending a notification.
+ *
+ * If any of these conditions is not met, processing the suppressed notification is further delayed.
+ */
+ if (!state_suppressed && GetStateType() == StateTypeHard && !IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) {
+ if (NotificationReasonApplies(type)) {
+ Checkable::OnNotificationsRequested(this, type, cr, "", "", nullptr);
+ }
+ subtract |= NotificationRecovery|NotificationProblem;
+ }
+ }
+
+ for (auto type : {NotificationFlappingStart, NotificationFlappingEnd}) {
+ if (suppressed_types & type) {
+ bool still_applies = NotificationReasonApplies(type);
+
+ if (still_applies) {
+ if (!NotificationReasonSuppressed(type) && !IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) {
+ Checkable::OnNotificationsRequested(this, type, GetLastCheckResult(), "", "", nullptr);
+
+ subtract |= type;
+ }
+ } else {
+ subtract |= type;
+ }
+ }
+ }
+ }
+
+ if (subtract) {
+ ObjectLock olock (this);
+
+ int suppressed_types_before (GetSuppressedNotifications());
+ int suppressed_types_after (suppressed_types_before & ~subtract);
+
+ if (suppressed_types_after != suppressed_types_before) {
+ SetSuppressedNotifications(suppressed_types_after);
+ }
+ }
+}
+
+/**
+ * Re-sends all notifications previously suppressed by e.g. downtimes if the notification reason still applies.
+ */
+void Checkable::FireSuppressedNotificationsTimer(const Timer * const&)
+{
+ for (auto& host : ConfigType::GetObjectsByType<Host>()) {
+ host->FireSuppressedNotifications();
+ }
+
+ for (auto& service : ConfigType::GetObjectsByType<Service>()) {
+ service->FireSuppressedNotifications();
+ }
+}
+
+/**
+ * Returns whether sending a notification of type type right now would represent *this' current state correctly.
+ *
+ * @param type The type of notification to send (or not to send).
+ *
+ * @return Whether to send the notification.
+ */
+bool Checkable::NotificationReasonApplies(NotificationType type)
+{
+ switch (type) {
+ case NotificationProblem:
+ {
+ auto cr (GetLastCheckResult());
+ return cr && !IsStateOK(cr->GetState()) && cr->GetState() != GetStateBeforeSuppression();
+ }
+ case NotificationRecovery:
+ {
+ auto cr (GetLastCheckResult());
+ return cr && IsStateOK(cr->GetState()) && cr->GetState() != GetStateBeforeSuppression();
+ }
+ case NotificationFlappingStart:
+ return IsFlapping();
+ case NotificationFlappingEnd:
+ return !IsFlapping();
+ default:
+ VERIFY(!"Checkable#NotificationReasonStillApplies(): given type not implemented");
+ return false;
+ }
+}
+
+/**
+ * Checks if notifications of a given type should be suppressed for this Checkable at the moment.
+ *
+ * @param type The notification type for which to query the suppression status.
+ *
+ * @return true if no notification of this type should be sent.
+ */
+bool Checkable::NotificationReasonSuppressed(NotificationType type)
+{
+ switch (type) {
+ case NotificationProblem:
+ case NotificationRecovery:
+ return !IsReachable(DependencyNotification) || IsInDowntime() || IsAcknowledged();
+ case NotificationFlappingStart:
+ case NotificationFlappingEnd:
+ return IsInDowntime();
+ default:
+ return false;
+ }
+}
+
+/**
+ * E.g. we're going to re-send a stashed problem notification as *this is still not ok.
+ * But if the next check result recovers *this soon, we would send a recovery notification soon after the problem one.
+ * This is not desired, especially for lots of checkables at once.
+ * Because of that if there's likely to be a check result soon,
+ * we delay the re-sending of the stashed notification until the next check.
+ * That check either doesn't change anything and we finally re-send the stashed problem notification
+ * or recovers *this and we drop the stashed notification.
+ *
+ * @return Whether *this is likely to be checked soon
+ */
+bool Checkable::IsLikelyToBeCheckedSoon()
+{
+ if (!GetEnableActiveChecks()) {
+ return false;
+ }
+
+ // One minute unless the check interval is too short so the next check will always run during the next minute.
+ auto threshold (GetCheckInterval() - 10);
+
+ if (threshold > 60) {
+ threshold = 60;
+ } else if (threshold < 0) {
+ threshold = 0;
+ }
+
+ return GetNextCheck() <= Utility::GetTime() + threshold;
+}