summaryrefslogtreecommitdiffstats
path: root/monitoring/snmp/CEPH-MIB.txt
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--monitoring/snmp/CEPH-MIB.txt337
1 files changed, 337 insertions, 0 deletions
diff --git a/monitoring/snmp/CEPH-MIB.txt b/monitoring/snmp/CEPH-MIB.txt
new file mode 100644
index 000000000..f54cb3610
--- /dev/null
+++ b/monitoring/snmp/CEPH-MIB.txt
@@ -0,0 +1,337 @@
+CEPH-MIB DEFINITIONS ::= BEGIN
+
+IMPORTS
+ MODULE-IDENTITY, NOTIFICATION-TYPE, enterprises
+ FROM SNMPv2-SMI
+ MODULE-COMPLIANCE, NOTIFICATION-GROUP
+ FROM SNMPv2-CONF
+;
+
+-- Linting information:
+--
+-- # smilint -l 6 -i notification-not-reversible ./CEPH-MIB.txt
+--
+-- ignore: notification-not-reversible since our SNMP gateway doesn't use SNMPv1
+--
+
+ceph MODULE-IDENTITY
+ LAST-UPDATED
+ "202111010000Z" -- Nov 01, 2021
+ ORGANIZATION
+ "The Ceph Project
+ https://ceph.io"
+ CONTACT-INFO
+ "Email: <dev@ceph.io>
+
+ Send comments to: <dev@ceph.io>"
+ DESCRIPTION
+ "The MIB module for Ceph. In it's current form it only
+ supports Notifications, since Ceph itself doesn't provide
+ any SNMP agent functionality.
+
+ Notifications are provided through a Prometheus/Alertmanager
+ webhook passing alerts to an external gateway service that is
+ responsible for formatting, forwarding and authenticating to
+ the SNMP receiver.
+ "
+ REVISION
+ "202111010000Z" --Nov 01, 2021
+ DESCRIPTION
+ "Latest version including the following updates;
+
+ - MIB restructure to align with linting
+ - names shortened and simplified (less verbose)
+ - Simplified structure due to switch to https://github.com/maxwo/snmp_notifier
+ - objects removed
+ - notifications updated
+ - Added module compliance
+ - Updated to latest prometheus alert rule definitions
+ "
+ ::= { enterprises 50495 }
+
+cephCluster OBJECT IDENTIFIER ::= { ceph 1 }
+cephConformance OBJECT IDENTIFIER ::= { ceph 2 }
+
+-- cephMetadata is a placeholder for possible future expansion via an agent
+-- where we could provide an overview of the clusters configuration
+cephMetadata OBJECT IDENTIFIER ::= { cephCluster 1 }
+cephNotifications OBJECT IDENTIFIER ::= { cephCluster 2 }
+
+prometheus OBJECT IDENTIFIER ::= { cephNotifications 1 }
+
+--
+-- Notifications: first we define the notification 'branches' for the
+-- different categories of notifications / alerts
+promGeneric OBJECT IDENTIFIER ::= { prometheus 1 }
+promHealthStatus OBJECT IDENTIFIER ::= { prometheus 2 }
+promMon OBJECT IDENTIFIER ::= { prometheus 3 }
+promOsd OBJECT IDENTIFIER ::= { prometheus 4 }
+promMds OBJECT IDENTIFIER ::= { prometheus 5 }
+promMgr OBJECT IDENTIFIER ::= { prometheus 6 }
+promPGs OBJECT IDENTIFIER ::= { prometheus 7 }
+promNode OBJECT IDENTIFIER ::= { prometheus 8 }
+promPool OBJECT IDENTIFIER ::= { prometheus 9 }
+promRados OBJECT IDENTIFIER ::= { prometheus 10 }
+promCephadm OBJECT IDENTIFIER ::= { prometheus 11 }
+promPrometheus OBJECT IDENTIFIER ::= { prometheus 12 }
+
+promGenericNotification NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Generic alert issued when the Prometheus rule doesn't provide an OID."
+::= { promGeneric 1 }
+
+promGenericDaemonCrash NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "One or more daemons have crashed recently, and are yet to be archived"
+::= { promGeneric 2 }
+
+promHealthStatusError NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Ceph in health_error state for too long."
+::= { promHealthStatus 1 }
+
+promHealthStatusWarning NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Ceph in health_warn for too long."
+::= { promHealthStatus 2 }
+
+promMonLowQuorum NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Monitor count in quorum is low."
+::= { promMon 1 }
+
+promMonDiskSpaceCritical NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Monitor diskspace is critically low."
+::= { promMon 2 }
+
+promOsdDownHigh NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "A high number of OSDs are down."
+::= { promOsd 1 }
+
+promOsdDown NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "One or more Osds down."
+::= { promOsd 2 }
+
+promOsdNearFull NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "An OSD is dangerously full."
+::= { promOsd 3 }
+
+promOsdFlapping NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "An OSD was marked down at back up at least once a minute for 5 minutes."
+::= { promOsd 4 }
+
+promOsdHighPgDeviation NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "An OSD deviates by more then 30% from average PG count."
+::= { promOsd 5 }
+
+promOsdFull NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "An OSD has reached its full threshold."
+::= { promOsd 6 }
+
+promOsdHighPredictedFailures NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Normal self healing unable to cope with the number of devices predicted to fail."
+::= { promOsd 7 }
+
+promOsdHostDown NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Ceph OSD host is down."
+::= { promOsd 8 }
+
+promMdsDamaged NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Cephfs filesystem is damaged."
+::= { promMds 1 }
+
+promMdsReadOnly NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Cephfs filesystem marked as READ-ONLY"
+::= { promMds 2 }
+
+promMdsOffline NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Cephfs filesystem is unavailable/offline."
+::= { promMds 3 }
+
+promMdsDegraded NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Cephfs filesystem is in a degraded state."
+::= { promMds 4 }
+
+promMdsNoStandby NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Cephfs MDS daemon failure, no standby available"
+::= { promMds 5 }
+
+promMgrModuleCrash NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Ceph mgr module has crashed recently"
+::= { promMgr 1 }
+
+promMgrPrometheusInactive NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Ceph mgr prometheus module not responding"
+::= { promMgr 2 }
+
+promPGsInactive NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "One or more PGs are inactive for more than 5 minutes."
+::= { promPGs 1 }
+
+promPGsUnclean NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "One or more PGs are not clean for more than 15 minutes."
+::= { promPGs 2 }
+
+promPGsUnavailable NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "One or more PGs is unavailable, blocking I/O to those objects."
+::= { promPGs 3 }
+
+promPGsDamaged NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "One or more PGs is damaged."
+::= { promPGs 4 }
+
+promPGsRecoveryFull NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "PG recovery is impaired due to full OSDs."
+::= { promPGs 5 }
+
+promPGsBackfillFull NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "PG backfill is impaired due to full OSDs."
+::= { promPGs 6 }
+
+promNodeRootVolumeFull NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Root volume (OSD and MON store) is dangerously full (< 5% free)."
+::= { promNode 1 }
+
+promNodeNetworkPacketDrops NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "A node experiences packet drop > 1 packet/s on an interface."
+::= { promNode 2 }
+
+promNodeNetworkPacketErrors NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "A node experiences packet errors > 1 packet/s on an interface."
+::= { promNode 3 }
+
+promNodeStorageFilling NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "A mountpoint will be full in less then 5 days assuming the average fillup rate of the past 48 hours."
+::= { promNode 4 }
+
+promPoolFull NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "A pool is at 90% capacity or over."
+::= { promPool 1 }
+
+promPoolFilling NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "A pool will be full in less then 5 days assuming the average fillup rate of the past 48 hours."
+::= { promPool 2 }
+
+promRadosUnfound NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "A RADOS object can not be found, even though all OSDs are online."
+::= { promRados 1 }
+
+promCephadmDaemonDown NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Cephadm has determined that a daemon is down."
+::= { promCephadm 1 }
+
+promCephadmUpgradeFailure NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "Cephadm attempted to upgrade the cluster and encountered a problem."
+::= { promCephadm 2 }
+
+promPrometheusJobMissing NOTIFICATION-TYPE
+ STATUS current
+ DESCRIPTION "The prometheus scrape job is not defined."
+::= { promPrometheus 1 }
+-- ---------------------------------------------------------- --
+-- IEEE 802.1D MIB - Conformance Information
+-- ---------------------------------------------------------- --
+
+cephAlertGroups OBJECT IDENTIFIER ::= { cephConformance 1 }
+cephCompliances OBJECT IDENTIFIER ::= { cephConformance 2 }
+
+-- ---------------------------------------------------------- --
+-- units of conformance
+-- ---------------------------------------------------------- --
+
+-- ---------------------------------------------------------- --
+-- The Trap Notification Group
+-- ---------------------------------------------------------- --
+
+cephNotificationGroup NOTIFICATION-GROUP
+ NOTIFICATIONS {
+ promGenericNotification,
+ promGenericDaemonCrash,
+ promHealthStatusError,
+ promHealthStatusWarning,
+ promMonLowQuorum,
+ promMonDiskSpaceCritical,
+ promOsdDownHigh,
+ promOsdDown,
+ promOsdNearFull,
+ promOsdFlapping,
+ promOsdHighPgDeviation,
+ promOsdFull,
+ promOsdHighPredictedFailures,
+ promOsdHostDown,
+ promMdsDamaged,
+ promMdsReadOnly,
+ promMdsOffline,
+ promMdsDegraded,
+ promMdsNoStandby,
+ promMgrModuleCrash,
+ promMgrPrometheusInactive,
+ promPGsInactive,
+ promPGsUnclean,
+ promPGsUnavailable,
+ promPGsDamaged,
+ promPGsRecoveryFull,
+ promPGsBackfillFull,
+ promNodeRootVolumeFull,
+ promNodeNetworkPacketDrops,
+ promNodeNetworkPacketErrors,
+ promNodeStorageFilling,
+ promPoolFull,
+ promPoolFilling,
+ promRadosUnfound,
+ promCephadmDaemonDown,
+ promCephadmUpgradeFailure,
+ promPrometheusJobMissing
+ }
+ STATUS current
+ DESCRIPTION
+ "A collection of notifications triggered by the Prometheus
+ rules to convey Ceph cluster state"
+ ::= { cephAlertGroups 2 }
+
+-- ---------------------------------------------------------- --
+-- compliance statements
+-- ---------------------------------------------------------- --
+
+cephCompliance MODULE-COMPLIANCE
+ STATUS current
+ DESCRIPTION
+ "The Compliance statement for the Ceph MIB"
+ MODULE
+ MANDATORY-GROUPS {
+ cephNotificationGroup
+ }
+ ::= { cephCompliances 1 }
+
+END