summaryrefslogtreecommitdiffstats
path: root/src/common/options/mon.yaml.in
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/options/mon.yaml.in')
-rw-r--r--src/common/options/mon.yaml.in1340
1 files changed, 1340 insertions, 0 deletions
diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in
new file mode 100644
index 000000000..1cd655ad4
--- /dev/null
+++ b/src/common/options/mon.yaml.in
@@ -0,0 +1,1340 @@
+# -*- mode: YAML -*-
+---
+
+options:
+- name: osd_crush_update_weight_set
+ type: bool
+ level: advanced
+ desc: update CRUSH weight-set weights when updating weights
+ long_desc: If this setting is true, we will update the weight-set weights when adjusting
+ an item's weight, effectively making changes take effect immediately, and discarding
+ any previous optimization in the weight-set value. Setting this value to false
+ will leave it to the balancer to (slowly, presumably) adjust weights to approach
+ the new target value.
+ default: true
+ with_legacy: true
+- name: osd_pool_erasure_code_stripe_unit
+ type: size
+ level: advanced
+ desc: the amount of data (in bytes) in a data chunk, per stripe
+ fmt_desc: Sets the default size, in bytes, of a chunk of an object
+ stripe for erasure coded pools. Every object of size S
+ will be stored as N stripes, with each data chunk
+ receiving ``stripe unit`` bytes. Each stripe of ``N *
+ stripe unit`` bytes will be encoded/decoded
+ individually. This option can is overridden by the
+ ``stripe_unit`` setting in an erasure code profile.
+ default: 4_K
+ services:
+ - mon
+- name: osd_pool_default_crimson
+ type: bool
+ level: advanced
+ desc: Create pools by default with FLAG_CRIMSON
+ default: false
+ services :
+ - mon
+ flags:
+ - runtime
+- name: mon_max_pool_pg_num
+ type: uint
+ level: advanced
+ default: 64_K
+ fmt_desc: The maximum number of placement groups per pool.
+- name: mon_mgr_digest_period
+ type: int
+ level: dev
+ desc: Period in seconds between monitor-to-manager health/status updates
+ default: 5
+ services:
+ - mon
+- name: mon_down_mkfs_grace
+ type: secs
+ level: advanced
+ desc: Period in seconds that the cluster may have a mon down after cluster creation
+ default: 1_min
+ services:
+ - mon
+- name: mon_mgr_beacon_grace
+ type: secs
+ level: advanced
+ desc: Period in seconds from last beacon to monitor marking a manager daemon as
+ failed
+ default: 30
+ services:
+ - mon
+- name: mon_mgr_inactive_grace
+ type: int
+ level: advanced
+ desc: Period in seconds after cluster creation during which cluster may have no
+ active manager
+ long_desc: This grace period enables the cluster to come up cleanly without raising
+ spurious health check failures about managers that aren't online yet
+ default: 1_min
+ services:
+ - mon
+- name: mon_mgr_mkfs_grace
+ type: int
+ level: advanced
+ desc: Period in seconds that the cluster may have no active manager before this
+ is reported as an ERR rather than a WARN
+ default: 2_min
+ services:
+ - mon
+- name: mon_mgr_proxy_client_bytes_ratio
+ type: float
+ level: dev
+ desc: ratio of mon_client_bytes that can be consumed by proxied mgr commands before
+ we error out to client
+ default: 0.3
+ services:
+ - mon
+- name: mon_cluster_log_to_stderr
+ type: bool
+ level: advanced
+ desc: Make monitor send cluster log messages to stderr (prefixed by channel)
+ default: false
+ services:
+ - mon
+ see_also:
+ - log_stderr_prefix
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_syslog
+ type: str
+ level: advanced
+ desc: Make monitor send cluster log messages to syslog
+ fmt_desc: Determines if the cluster log should be output to the syslog.
+ default: default=false
+ services:
+ - mon
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_syslog_level
+ type: str
+ level: advanced
+ desc: Syslog level for cluster log messages
+ default: info
+ services:
+ - mon
+ see_also:
+ - mon_cluster_log_to_syslog
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_syslog_facility
+ type: str
+ level: advanced
+ desc: Syslog facility for cluster log messages
+ default: daemon
+ services:
+ - mon
+ see_also:
+ - mon_cluster_log_to_syslog
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_file
+ type: bool
+ level: advanced
+ desc: Make monitor send cluster log messages to file
+ default: true
+ services:
+ - mon
+ see_also:
+ - mon_cluster_log_file
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_file
+ type: str
+ level: advanced
+ desc: File(s) to write cluster log to
+ long_desc: This can either be a simple file name to receive all messages, or a list
+ of key/value pairs where the key is the log channel and the value is the filename,
+ which may include $cluster and $channel metavariables
+ fmt_desc: |
+ The locations of the cluster's log files. There are two channels in
+ Ceph: ``cluster`` and ``audit``. This option represents a mapping
+ from channels to log files, where the log entries of that
+ channel are sent to. The ``default`` entry is a fallback
+ mapping for channels not explicitly specified. So, the following
+ default setting will send cluster log to ``$cluster.log``, and
+ send audit log to ``$cluster.audit.log``, where ``$cluster`` will
+ be replaced with the actual cluster name.
+ default: default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log
+ services:
+ - mon
+ see_also:
+ - mon_cluster_log_to_file
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_file_level
+ type: str
+ level: advanced
+ desc: Lowest level to include is cluster log file
+ default: debug
+ services:
+ - mon
+ see_also:
+ - mon_cluster_log_file
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_graylog
+ type: str
+ level: advanced
+ desc: Make monitor send cluster log to graylog
+ default: 'false'
+ services:
+ - mon
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_graylog_host
+ type: str
+ level: advanced
+ desc: Graylog host for cluster log messages
+ default: 127.0.0.1
+ services:
+ - mon
+ see_also:
+ - mon_cluster_log_to_graylog
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_graylog_port
+ type: str
+ level: advanced
+ desc: Graylog port for cluster log messages
+ default: '12201'
+ services:
+ - mon
+ see_also:
+ - mon_cluster_log_to_graylog
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cluster_log_to_journald
+ type: str
+ level: advanced
+ desc: Make monitor send cluster log to journald
+ default: 'false'
+ services:
+ - mon
+ flags:
+ - runtime
+- name: mon_log_max
+ type: uint
+ level: advanced
+ desc: number of recent cluster log messages to retain
+ default: 10000
+ services:
+ - mon
+ with_legacy: true
+- name: mon_log_max_summary
+ type: uint
+ level: advanced
+ desc: number of recent cluster log messages to dedup against
+ default: 50
+ services:
+ - mon
+ with_legacy: true
+- name: mon_log_full_interval
+ type: uint
+ level: advanced
+ desc: how many epochs before we encode a full copy of recent log keys
+ default: 50
+ services: [mon]
+ with_legacy: true
+- name: mon_max_log_entries_per_event
+ type: int
+ level: advanced
+ desc: max cluster log entries per paxos event
+ fmt_desc: The maximum number of log entries per event.
+ default: 4096
+ services:
+ - mon
+ with_legacy: true
+- name: mon_health_to_clog
+ type: bool
+ level: advanced
+ desc: log monitor health to cluster log
+ fmt_desc: Enable sending a health summary to the cluster log periodically.
+ default: true
+ services:
+ - mon
+ with_legacy: true
+- name: mon_health_to_clog_interval
+ type: int
+ level: advanced
+ desc: frequency to log monitor health to cluster log
+ fmt_desc: How often (in seconds) the monitor sends a health summary to the cluster
+ log (a non-positive number disables). Monitors will always
+ send a summary to the cluster log whether or not it differs from
+ the previous summary.
+ default: 10_min
+ services:
+ - mon
+ see_also:
+ - mon_health_to_clog
+ with_legacy: true
+- name: mon_health_to_clog_tick_interval
+ type: float
+ level: dev
+ fmt_desc: How often (in seconds) the monitor sends a health summary to the cluster
+ log (a non-positive number disables). If current health summary
+ is empty or identical to the last time, monitor will not send it
+ to cluster log.
+ default: 1_min
+ services:
+ - mon
+ with_legacy: true
+- name: mon_health_detail_to_clog
+ type: bool
+ level: dev
+ desc: log health detail to cluster log
+ default: true
+ with_legacy: true
+- name: mon_warn_on_filestore_osds
+ type: bool
+ level: dev
+ desc: log health warn for filestore OSDs
+ default: true
+ with_legacy: true
+- name: mon_health_max_detail
+ type: uint
+ level: advanced
+ desc: max detailed pgs to report in health detail
+ default: 50
+ services:
+ - mon
+- name: mon_health_log_update_period
+ type: int
+ level: dev
+ desc: minimum time in seconds between log messages about each health check
+ default: 5
+ services:
+ - mon
+ min: 0
+- name: mon_data_avail_crit
+ type: int
+ level: advanced
+ desc: issue MON_DISK_CRIT health error when mon available space below this percentage
+ fmt_desc: Raise ``HEALTH_ERR`` status when the filesystem that houses a
+ monitor's data store reports that its available capacity is
+ less than or equal to this percentage.
+ default: 5
+ services:
+ - mon
+ with_legacy: true
+- name: mon_data_avail_warn
+ type: int
+ level: advanced
+ desc: issue MON_DISK_LOW health warning when mon available space below this percentage
+ fmt_desc: Raise ``HEALTH_WARN`` status when the filesystem that houses a
+ monitor's data store reports that its available capacity is
+ less than or equal to this percentage .
+ default: 30
+ services:
+ - mon
+ with_legacy: true
+- name: mon_data_size_warn
+ type: size
+ level: advanced
+ desc: issue MON_DISK_BIG health warning when mon database is above this size
+ fmt_desc: Raise ``HEALTH_WARN`` status when a monitor's data
+ store grows to be larger than this size, 15GB by default.
+ default: 15_G
+ services:
+ - mon
+ with_legacy: true
+- name: mon_daemon_bytes
+ type: size
+ level: advanced
+ desc: max bytes of outstanding mon messages mon will read off the network
+ fmt_desc: The message memory cap for metadata server and OSD messages (in bytes).
+ default: 400_M
+ services:
+ - mon
+ with_legacy: true
+- name: mon_election_timeout
+ type: float
+ level: advanced
+ desc: maximum time for a mon election (seconds)
+ fmt_desc: On election proposer, maximum waiting time for all ACKs in seconds.
+ default: 5
+ services:
+ - mon
+ with_legacy: true
+- name: mon_election_default_strategy
+ type: uint
+ level: advanced
+ desc: The election strategy to set when constructing the first monmap.
+ default: 1
+ min: 1
+ max: 3
+- name: mon_lease
+ type: float
+ level: advanced
+ desc: lease interval between quorum monitors (seconds)
+ long_desc: This setting controls how sensitive your mon quorum is to intermittent
+ network issues or other failures.
+ fmt_desc: The length (in seconds) of the lease on the monitor's versions.
+ default: 5
+ services:
+ - mon
+ with_legacy: true
+- name: mon_lease_renew_interval_factor
+ type: float
+ level: advanced
+ desc: multiple of mon_lease for the lease renewal interval
+ long_desc: Leases must be renewed before they time out. A smaller value means frequent
+ renewals, while a value close to 1 makes a lease expiration more likely.
+ fmt_desc: |
+ ``mon_lease`` \* ``mon_lease_renew_interval_factor`` will be the
+ interval for the Leader to renew the other monitor's leases. The
+ factor should be less than ``1.0``.
+ default: 0.6
+ services:
+ - mon
+ see_also:
+ - mon_lease
+ min: 0
+ max: 0.9999999
+ with_legacy: true
+- name: mon_lease_ack_timeout_factor
+ type: float
+ level: advanced
+ desc: multiple of mon_lease for the lease ack interval before calling new election
+ fmt_desc: The Leader will wait ``mon_lease`` \* ``mon_lease_ack_timeout_factor``
+ for the Providers to acknowledge the lease extension.
+ default: 2
+ services:
+ - mon
+ see_also:
+ - mon_lease
+ min: 1.0001
+ max: 100
+ with_legacy: true
+- name: mon_accept_timeout_factor
+ type: float
+ level: advanced
+ desc: multiple of mon_lease for follower mons to accept proposed state changes before
+ calling a new election
+ fmt_desc: The Leader will wait ``mon_lease`` \* ``mon_accept_timeout_factor``
+ for the Requester(s) to accept a Paxos update. It is also used
+ during the Paxos recovery phase for similar purposes.
+ default: 2
+ services:
+ - mon
+ see_also:
+ - mon_lease
+ with_legacy: true
+- name: mon_elector_ping_timeout
+ type: float
+ level: advanced
+ desc: The time after which a ping 'times out' and a connection is considered down
+ default: 2
+ services:
+ - mon
+ see_also:
+ - mon_elector_ping_divisor
+- name: mon_elector_ping_divisor
+ type: uint
+ level: advanced
+ desc: We will send a ping up to this many times per timeout per
+ default: 2
+ services:
+ - mon
+ see_also:
+ - mon_elector_ping_timeout
+- name: mon_con_tracker_persist_interval
+ type: uint
+ level: advanced
+ desc: how many updates the ConnectionTracker takes before it persists to disk
+ default: 10
+ services:
+ - mon
+ min: 1
+ max: 100000
+- name: mon_con_tracker_score_halflife
+ type: uint
+ level: advanced
+ desc: The 'halflife' used when updating/calculating peer connection scores
+ default: 43200
+ services:
+ - mon
+ min: 60
+- name: mon_elector_ignore_propose_margin
+ type: float
+ level: advanced
+ desc: The difference in connection score allowed before a peon stops ignoring out-of-quorum
+ PROPOSEs
+ default: 0.0005
+ services:
+ - mon
+- name: mon_warn_on_cache_pools_without_hit_sets
+ type: bool
+ level: advanced
+ desc: issue CACHE_POOL_NO_HIT_SET health warning for cache pools that do not have
+ hit sets configured
+ fmt_desc: Raise ``HEALTH_WARN`` when a cache pool does not have the ``hit_set_type``
+ value configured. See :ref:`hit_set_type <hit_set_type>` for more details.
+ default: true
+ services:
+ - mon
+ with_legacy: true
+- name: mon_warn_on_pool_pg_num_not_power_of_two
+ type: bool
+ level: dev
+ desc: issue POOL_PG_NUM_NOT_POWER_OF_TWO warning if pool has a non-power-of-two
+ pg_num value
+ default: true
+ services:
+ - mon
+- name: mon_allow_pool_size_one
+ type: bool
+ level: advanced
+ desc: allow configuring pool with no replicas
+ default: false
+ services:
+ - mon
+- name: mon_warn_on_crush_straw_calc_version_zero
+ type: bool
+ level: advanced
+ desc: issue OLD_CRUSH_STRAW_CALC_VERSION health warning if the CRUSH map's straw_calc_version
+ is zero
+ fmt_desc: Raise ``HEALTH_WARN`` when the CRUSH ``straw_calc_version`` is zero. See
+ :ref:`CRUSH map tunables <crush-map-tunables>` for details.
+ default: true
+ services:
+ - mon
+ with_legacy: true
+- name: mon_warn_on_pool_no_redundancy
+ type: bool
+ level: advanced
+ desc: Issue a health warning if any pool is configured with no replicas
+ fmt_desc: Raise ``HEALTH_WARN`` if any pool is configured with no replicas.
+ default: true
+ services:
+ - mon
+ see_also:
+ - osd_pool_default_size
+ - osd_pool_default_min_size
+- name: mon_warn_on_osd_down_out_interval_zero
+ type: bool
+ level: advanced
+ desc: issue OSD_NO_DOWN_OUT_INTERVAL health warning if mon_osd_down_out_interval
+ is zero
+ long_desc: Having mon_osd_down_out_interval set to 0 means that down OSDs are not
+ marked out automatically and the cluster does not heal itself without administrator
+ intervention.
+ fmt_desc: Raise ``HEALTH_WARN`` when ``mon_osd_down_out_interval`` is zero. Having this
+ option set to zero on the leader acts much like the ``noout`` flag. It's hard to figure
+ out what's going wrong with clusters without the ``noout`` flag set but acting like that
+ just the same, so we report a warning in this case.
+ default: true
+ services:
+ - mon
+ see_also:
+ - mon_osd_down_out_interval
+ with_legacy: true
+- name: mon_warn_on_legacy_crush_tunables
+ type: bool
+ level: advanced
+ desc: issue OLD_CRUSH_TUNABLES health warning if CRUSH tunables are older than mon_crush_min_required_version
+ fmt_desc: Raise ``HEALTH_WARN`` when CRUSH tunables are too old (older than ``mon_min_crush_required_version``)
+ default: true
+ services:
+ - mon
+ see_also:
+ - mon_crush_min_required_version
+ with_legacy: true
+- name: mon_crush_min_required_version
+ type: str
+ level: advanced
+ desc: minimum ceph release to use for mon_warn_on_legacy_crush_tunables
+ fmt_desc: The minimum tunable profile required by the cluster. See
+ :ref:`CRUSH map tunables <crush-map-tunables>` for details.
+ default: hammer
+ services:
+ - mon
+ see_also:
+ - mon_warn_on_legacy_crush_tunables
+ with_legacy: true
+- name: mon_warn_on_degraded_stretch_mode
+ type: bool
+ level: advanced
+ desc: Issue a health warning if we are in degraded stretch mode
+ default: true
+ services:
+ - mon
+- name: mon_stretch_cluster_recovery_ratio
+ type: float
+ level: advanced
+ desc: the ratio of up OSDs at which a degraded stretch cluster enters recovery
+ default: 0.6
+ services:
+ - mon
+ min: 0.51
+ max: 1
+- name: mon_stretch_recovery_min_wait
+ type: float
+ level: advanced
+ desc: how long the monitors wait before considering fully-healthy PGs as evidence
+ the stretch mode is repaired
+ default: 15
+ services:
+ - mon
+ min: 1
+- name: mon_stretch_pool_size
+ type: uint
+ level: dev
+ default: 4
+ services:
+ - mon
+ min: 3
+ max: 6
+- name: mon_stretch_pool_min_size
+ type: uint
+ level: dev
+ default: 2
+ services:
+ - mon
+ min: 2
+ max: 4
+- name: mon_clock_drift_allowed
+ type: float
+ level: advanced
+ desc: allowed clock drift (in seconds) between mons before issuing a health warning
+ default: 0.05
+ services:
+ - mon
+ with_legacy: true
+# exponential backoff for clock drift warnings
+- name: mon_clock_drift_warn_backoff
+ type: float
+ level: advanced
+ desc: exponential backoff factor for logging clock drift warnings in the cluster
+ log
+ default: 5
+ services:
+ - mon
+ with_legacy: true
+# on leader, timecheck (clock drift check) interval (seconds)
+- name: mon_timecheck_interval
+ type: float
+ level: advanced
+ desc: frequency of clock synchronization checks between monitors (seconds)
+ fmt_desc: The time check interval (clock drift check) in seconds
+ for the Leader.
+ default: 5_min
+ services:
+ - mon
+ with_legacy: true
+# on leader, timecheck (clock drift check) interval when in presence of a skew (seconds)
+- name: mon_timecheck_skew_interval
+ type: float
+ level: advanced
+ desc: frequency of clock synchronization (re)checks between monitors while clocks
+ are believed to be skewed (seconds)
+ fmt_desc: The time check interval (clock drift check) in seconds when in
+ presence of a skew in seconds for the Leader.
+ default: 30
+ services:
+ - mon
+ see_also:
+ - mon_timecheck_interval
+ with_legacy: true
+# how often (in commits) to stash a full copy of the PaxosService state
+- name: paxos_stash_full_interval
+ type: int
+ level: advanced
+ default: 25
+ services:
+ - mon
+ fmt_desc: How often (in commits) to stash a full copy of the PaxosService state.
+ Current this setting only affects ``mds``, ``mon``, ``auth`` and ``mgr``
+ PaxosServices.
+ with_legacy: true
+# max paxos iterations before we must first sync the monitor stores
+- name: paxos_max_join_drift
+ type: int
+ level: advanced
+ default: 10
+ services:
+ - mon
+ fmt_desc: The maximum Paxos iterations before we must first sync the
+ monitor data stores. When a monitor finds that its peer is too
+ far ahead of it, it will first sync with data stores before moving
+ on.
+ with_legacy: true
+# gather updates for this long before proposing a map update
+- name: paxos_propose_interval
+ type: float
+ level: advanced
+ default: 1
+ services:
+ - mon
+ fmt_desc: Gather updates for this time interval before proposing
+ a map update.
+ with_legacy: true
+# min time to gather updates for after period of inactivity
+- name: paxos_min_wait
+ type: float
+ level: advanced
+ default: 0.05
+ services:
+ - mon
+ fmt_desc: The minimum amount of time to gather updates after a period of
+ inactivity.
+ with_legacy: true
+# minimum number of paxos states to keep around
+- name: paxos_min
+ type: int
+ level: advanced
+ default: 500
+ services:
+ - mon
+ fmt_desc: The minimum number of Paxos states to keep around
+ with_legacy: true
+# number of extra proposals tolerated before trimming
+- name: paxos_trim_min
+ type: int
+ level: advanced
+ default: 250
+ services:
+ - mon
+ fmt_desc: Number of extra proposals tolerated before trimming
+ with_legacy: true
+# maximum amount of versions to trim during a single proposal (0 disables it)
+- name: paxos_trim_max
+ type: int
+ level: advanced
+ default: 500
+ services:
+ - mon
+ fmt_desc: The maximum number of extra proposals to trim at a time
+ with_legacy: true
+# minimum amount of versions to trigger a trim (0 disables it)
+- name: paxos_service_trim_min
+ type: uint
+ level: advanced
+ default: 250
+ services:
+ - mon
+ fmt_desc: The minimum amount of versions to trigger a trim (0 disables it)
+ with_legacy: true
+# maximum amount of versions to trim during a single proposal (0 disables it)
+- name: paxos_service_trim_max
+ type: uint
+ level: advanced
+ default: 500
+ services:
+ - mon
+ fmt_desc: The maximum amount of versions to trim during a single proposal (0 disables it)
+ with_legacy: true
+- name: paxos_service_trim_max_multiplier
+ type: uint
+ level: advanced
+ desc: factor by which paxos_service_trim_max will be multiplied to get a new upper
+ bound when trim sizes are high (0 disables it)
+ default: 20
+ services:
+ - mon
+ min: 0
+ flags:
+ - runtime
+- name: paxos_kill_at
+ type: int
+ level: dev
+ default: 0
+ services:
+ - mon
+ with_legacy: true
+- name: mon_auth_validate_all_caps
+ type: bool
+ level: advanced
+ desc: Whether to parse non-monitor capabilities set by the 'ceph auth ...' commands.
+ Disabling this saves CPU on the monitor, but allows invalid capabilities to be
+ set, and only be rejected later, when they are used.
+ default: true
+ services:
+ - mon
+ flags:
+ - runtime
+# force mon to trim mdsmaps to this point (dangerous)
+- name: mon_mds_force_trim_to
+ type: int
+ level: dev
+ desc: force mons to trim mdsmaps/fsmaps up to this epoch
+ fmt_desc: Force monitor to trim mdsmaps up to but not including this FSMap
+ epoch. A value of 0 disables (the default) this config. This command is
+ potentially dangerous, use with care.
+ default: 0
+ services:
+ - mon
+ with_legacy: true
+- name: mds_beacon_mon_down_grace
+ type: secs
+ level: advanced
+ desc: tolerance in seconds for missed MDS beacons to monitors
+ fmt_desc: The interval without beacons before Ceph declares an MDS laggy
+ when a monitor is down.
+ default: 1_min
+# skip safety assertions on FSMap (in case of bugs where we want to continue anyway)
+- name: mon_mds_skip_sanity
+ type: bool
+ level: advanced
+ desc: skip sanity checks on fsmap/mdsmap
+ fmt_desc: Skip safety assertions on FSMap (in case of bugs where we want to
+ continue anyway). Monitor terminates if the FSMap sanity check
+ fails, but we can disable it by enabling this option.
+ default: false
+ services:
+ - mon
+ with_legacy: true
+- name: mon_mds_blocklist_interval
+ type: float
+ level: dev
+ desc: Duration in seconds that blocklist entries for MDS daemons remain in the OSD
+ map
+ fmt_desc: The blocklist duration for failed MDSs in the OSD map. Note,
+ this controls how long failed MDS daemons will stay in the
+ OSDMap blocklist. It has no effect on how long something is
+ blocklisted when the administrator blocklists it manually. For
+ example, ``ceph osd blocklist add`` will still use the default
+ blocklist time.
+ default: 1_day
+ services:
+ - mon
+ min: 1_hr
+ flags:
+ - runtime
+- name: mon_mgr_blocklist_interval
+ type: float
+ level: dev
+ desc: Duration in seconds that blocklist entries for mgr daemons remain in the OSD
+ map
+ default: 1_day
+ services:
+ - mon
+ min: 1_hr
+ flags:
+ - runtime
+- name: mon_osd_laggy_halflife
+ type: int
+ level: advanced
+ desc: halflife of OSD 'lagginess' factor
+ fmt_desc: The number of seconds laggy estimates will decay.
+ default: 1_hr
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_laggy_weight
+ type: float
+ level: advanced
+ desc: how heavily to weight OSD marking itself back up in overall laggy_probability
+ long_desc: 1.0 means that an OSD marking itself back up (because it was marked down
+ but not actually dead) means a 100% laggy_probability; 0.0 effectively disables
+ tracking of laggy_probability.
+ fmt_desc: The weight for new samples in laggy estimation decay.
+ default: 0.3
+ services:
+ - mon
+ min: 0
+ max: 1
+ with_legacy: true
+- name: mon_osd_laggy_max_interval
+ type: int
+ level: advanced
+ desc: cap value for period for OSD to be marked for laggy_interval calculation
+ fmt_desc: Maximum value of ``laggy_interval`` in laggy estimations (in seconds).
+ Monitor uses an adaptive approach to evaluate the ``laggy_interval`` of
+ a certain OSD. This value will be used to calculate the grace time for
+ that OSD.
+ default: 5_min
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_adjust_heartbeat_grace
+ type: bool
+ level: advanced
+ desc: increase OSD heartbeat grace if peers appear to be laggy
+ long_desc: If an OSD is marked down but then marks itself back up, it implies it
+ wasn't actually down but was unable to respond to heartbeats. If this option
+ is true, we can use the laggy_probability and laggy_interval values calculated
+ to model this situation to increase the heartbeat grace period for this OSD so
+ that it isn't marked down again. laggy_probability is an estimated probability
+ that the given OSD is down because it is laggy (not actually down), and laggy_interval
+ is an estiate on how long it stays down when it is laggy.
+ fmt_desc: If set to ``true``, Ceph will scale based on laggy estimations.
+ default: true
+ services:
+ - mon
+ see_also:
+ - mon_osd_laggy_halflife
+ - mon_osd_laggy_weight
+ - mon_osd_laggy_max_interval
+ with_legacy: true
+- name: mon_osd_adjust_down_out_interval
+ type: bool
+ level: advanced
+ desc: increase the mon_osd_down_out_interval if an OSD appears to be laggy
+ fmt_desc: If set to ``true``, Ceph will scaled based on laggy estimations.
+ default: true
+ services:
+ - mon
+ see_also:
+ - mon_osd_adjust_heartbeat_grace
+ with_legacy: true
+- name: mon_osd_auto_mark_in
+ type: bool
+ level: advanced
+ desc: mark any OSD that comes up 'in'
+ fmt_desc: Ceph will mark any booting Ceph OSD Daemons as ``in``
+ the Ceph Storage Cluster.
+ default: false
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_auto_mark_auto_out_in
+ type: bool
+ level: advanced
+ desc: mark any OSD that comes up that was automatically marked 'out' back 'in'
+ fmt_desc: Ceph will mark booting Ceph OSD Daemons auto marked ``out``
+ of the Ceph Storage Cluster as ``in`` the cluster.
+ default: true
+ services:
+ - mon
+ see_also:
+ - mon_osd_down_out_interval
+ with_legacy: true
+- name: mon_osd_auto_mark_new_in
+ type: bool
+ level: advanced
+ desc: mark any new OSD that comes up 'in'
+ fmt_desc: Ceph will mark booting new Ceph OSD Daemons as ``in`` the
+ Ceph Storage Cluster.
+ default: true
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_destroyed_out_interval
+ type: int
+ level: advanced
+ desc: mark any OSD 'out' that has been 'destroy'ed for this long (seconds)
+ default: 10_min
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_down_out_interval
+ type: int
+ level: advanced
+ desc: mark any OSD 'out' that has been 'down' for this long (seconds)
+ fmt_desc: The number of seconds Ceph waits before marking a Ceph OSD Daemon
+ ``down`` and ``out`` if it doesn't respond.
+ default: 10_min
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_down_out_subtree_limit
+ type: str
+ level: advanced
+ desc: do not automatically mark OSDs 'out' if an entire subtree of this size is
+ down
+ fmt_desc: The smallest :term:`CRUSH` unit type that Ceph will **not**
+ automatically mark out. For instance, if set to ``host`` and if
+ all OSDs of a host are down, Ceph will not automatically mark out
+ these OSDs.
+ default: rack
+ services:
+ - mon
+ see_also:
+ - mon_osd_down_out_interval
+ flags:
+ - runtime
+- name: mon_osd_min_up_ratio
+ type: float
+ level: advanced
+ desc: do not automatically mark OSDs 'out' if fewer than this many OSDs are 'up'
+ fmt_desc: The minimum ratio of ``up`` Ceph OSD Daemons before Ceph will
+ mark Ceph OSD Daemons ``down``.
+ default: 0.3
+ services:
+ - mon
+ see_also:
+ - mon_osd_down_out_interval
+ with_legacy: true
+- name: mon_osd_min_in_ratio
+ type: float
+ level: advanced
+ desc: do not automatically mark OSDs 'out' if fewer than this many OSDs are 'in'
+ fmt_desc: The minimum ratio of ``in`` Ceph OSD Daemons before Ceph will
+ mark Ceph OSD Daemons ``out``.
+ default: 0.75
+ services:
+ - mon
+ see_also:
+ - mon_osd_down_out_interval
+ with_legacy: true
+- name: mon_osd_warn_op_age
+ type: float
+ level: advanced
+ desc: issue REQUEST_SLOW health warning if OSD ops are slower than this age (seconds)
+ default: 32
+ services:
+ - mgr
+ with_legacy: true
+- name: mon_osd_warn_num_repaired
+ type: uint
+ level: advanced
+ desc: issue OSD_TOO_MANY_REPAIRS health warning if an OSD has more than this many
+ read repairs
+ default: 10
+ services:
+ - mon
+- name: mon_osd_prime_pg_temp
+ type: bool
+ level: dev
+ desc: minimize peering work by priming pg_temp values after a map change
+ fmt_desc: Enables or disables priming the PGMap with the previous OSDs when an ``out``
+ OSD comes back into the cluster. With the ``true`` setting, clients
+ will continue to use the previous OSDs until the newly ``in`` OSDs for
+ a PG have peered.
+ default: true
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_prime_pg_temp_max_time
+ type: float
+ level: dev
+ desc: maximum time to spend precalculating PG mappings on map change (seconds)
+ fmt_desc: How much time in seconds the monitor should spend trying to prime the
+ PGMap when an out OSD comes back into the cluster.
+ default: 0.5
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_prime_pg_temp_max_estimate
+ type: float
+ level: advanced
+ desc: calculate all PG mappings if estimated fraction of PGs that change is above
+ this amount
+ fmt_desc: Maximum estimate of time spent on each PG before we prime all PGs
+ in parallel.
+ default: 0.25
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_blocklist_default_expire
+ type: float
+ level: advanced
+ desc: Duration in seconds that blocklist entries for clients remain in the OSD map
+ default: 1_hr
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_crush_smoke_test
+ type: bool
+ level: advanced
+ desc: perform a smoke test on any new CRUSH map before accepting changes
+ default: true
+ services:
+ - mon
+ with_legacy: true
+- name: mon_smart_report_timeout
+ type: uint
+ level: advanced
+ desc: Timeout (in seconds) for smartctl to run, default is set to 5
+ default: 5
+ services:
+ - mon
+- name: mon_warn_on_older_version
+ type: bool
+ level: advanced
+ desc: issue DAEMON_OLD_VERSION health warning if daemons are not all running the
+ same version
+ default: true
+ services:
+ - mon
+- name: mon_warn_older_version_delay
+ type: secs
+ level: advanced
+ desc: issue DAEMON_OLD_VERSION health warning after this amount of time has elapsed
+ default: 7_day
+ services:
+ - mon
+- name: mon_data
+ type: str
+ level: advanced
+ desc: path to mon database
+ fmt_desc: The monitor's data location.
+ default: /var/lib/ceph/mon/$cluster-$id
+ services:
+ - mon
+ flags:
+ - no_mon_update
+ with_legacy: true
+- name: mon_rocksdb_options
+ type: str
+ level: advanced
+ default: write_buffer_size=33554432,compression=kNoCompression,level_compaction_dynamic_level_bytes=true
+ with_legacy: true
+- name: mon_enable_op_tracker
+ type: bool
+ level: advanced
+ desc: enable/disable MON op tracking
+ default: true
+ services:
+ - mon
+# compact leveldb on ceph-mon start
+- name: mon_compact_on_start
+ type: bool
+ level: advanced
+ default: false
+ services:
+ - mon
+ fmt_desc: Compact the database used as Ceph Monitor store on
+ ``ceph-mon`` start. A manual compaction helps to shrink the
+ monitor database and improve the performance of it if the regular
+ compaction fails to work.
+ with_legacy: true
+# trigger leveldb compaction on bootstrap
+- name: mon_compact_on_bootstrap
+ type: bool
+ level: advanced
+ default: false
+ services:
+ - mon
+ fmt_desc: Compact the database used as Ceph Monitor store
+ on bootstrap. Monitors probe each other to establish
+ a quorum after bootstrap. If a monitor times out before joining the
+ quorum, it will start over and bootstrap again.
+ with_legacy: true
+# compact (a prefix) when we trim old states
+- name: mon_compact_on_trim
+ type: bool
+ level: advanced
+ default: true
+ services:
+ - mon
+ fmt_desc: Compact a certain prefix (including paxos) when we trim its old states.
+ with_legacy: true
+- name: mon_op_complaint_time
+ type: secs
+ level: advanced
+ desc: time after which to consider a monitor operation blocked after no updates
+ default: 30
+ services:
+ - mon
+- name: mon_op_log_threshold
+ type: int
+ level: advanced
+ desc: max number of slow ops to display
+ default: 5
+ services:
+ - mon
+- name: mon_op_history_size
+ type: uint
+ level: advanced
+ desc: max number of completed ops to track
+ default: 20
+ services:
+ - mon
+- name: mon_op_history_duration
+ type: secs
+ level: advanced
+ desc: expiration time in seconds of historical MON OPS
+ default: 10_min
+ services:
+ - mon
+- name: mon_op_history_slow_op_size
+ type: uint
+ level: advanced
+ desc: max number of slow historical MON OPS to keep
+ default: 20
+ services:
+ - mon
+- name: mon_op_history_slow_op_threshold
+ type: secs
+ level: advanced
+ desc: duration of an op to be considered as a historical slow op
+ default: 10
+ services:
+ - mon
+- name: mon_osdmap_full_prune_enabled
+ type: bool
+ level: advanced
+ desc: enables pruning full osdmap versions when we go over a given number of maps
+ default: true
+ services:
+ - mon
+ see_also:
+ - mon_osdmap_full_prune_min
+ - mon_osdmap_full_prune_interval
+ - mon_osdmap_full_prune_txsize
+- name: mon_osdmap_full_prune_min
+ type: uint
+ level: advanced
+ desc: minimum number of versions in the store to trigger full map pruning
+ default: 10000
+ services:
+ - mon
+ see_also:
+ - mon_osdmap_full_prune_enabled
+ - mon_osdmap_full_prune_interval
+ - mon_osdmap_full_prune_txsize
+- name: mon_osdmap_full_prune_interval
+ type: uint
+ level: advanced
+ desc: interval between maps that will not be pruned; maps in the middle will be
+ pruned.
+ default: 10
+ services:
+ - mon
+ see_also:
+ - mon_osdmap_full_prune_enabled
+ - mon_osdmap_full_prune_interval
+ - mon_osdmap_full_prune_txsize
+- name: mon_osdmap_full_prune_txsize
+ type: uint
+ level: advanced
+ desc: number of maps we will prune per iteration
+ default: 100
+ services:
+ - mon
+ see_also:
+ - mon_osdmap_full_prune_enabled
+ - mon_osdmap_full_prune_interval
+ - mon_osdmap_full_prune_txsize
+- name: mon_osd_cache_size
+ type: int
+ level: advanced
+ desc: maximum number of OSDMaps to cache in memory
+ fmt_desc: The size of osdmaps cache, not to rely on underlying store's cache
+ default: 500
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_cache_size_min
+ type: size
+ level: advanced
+ desc: The minimum amount of bytes to be kept mapped in memory for osd monitor caches.
+ fmt_desc: The minimum amount of bytes to be kept mapped in memory for osd
+ monitor caches.
+ default: 128_M
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_mapping_pgs_per_chunk
+ type: int
+ level: dev
+ desc: granularity of PG placement calculation background work
+ fmt_desc: We calculate the mapping from placement group to OSDs in chunks.
+ This option specifies the number of placement groups per chunk.
+ default: 4096
+ services:
+ - mon
+ with_legacy: true
+- name: mon_clean_pg_upmaps_per_chunk
+ type: uint
+ level: dev
+ desc: granularity of PG upmap validation background work
+ default: 256
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_max_creating_pgs
+ type: int
+ level: advanced
+ desc: maximum number of PGs the mon will create at once
+ default: 1024
+ services:
+ - mon
+ with_legacy: true
+- name: mon_osd_max_initial_pgs
+ type: int
+ level: advanced
+ desc: maximum number of PGs a pool will created with
+ long_desc: If the user specifies more PGs than this, the cluster will subsequently
+ split PGs after the pool is created in order to reach the target.
+ default: 1024
+ services:
+ - mon
+- name: mon_memory_target
+ type: size
+ level: basic
+ desc: The amount of bytes pertaining to osd monitor caches and kv cache to be kept
+ mapped in memory with cache auto-tuning enabled
+ fmt_desc: The amount of bytes pertaining to OSD monitor caches and KV cache
+ to be kept mapped in memory with cache auto-tuning enabled.
+ default: 2_G
+ services:
+ - mon
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_memory_autotune
+ type: bool
+ level: basic
+ desc: Autotune the cache memory being used for osd monitors and kv database
+ fmt_desc: Autotune the cache memory used for OSD monitors and KV
+ database.
+ default: true
+ services:
+ - mon
+ flags:
+ - runtime
+ with_legacy: true
+- name: mon_cpu_threads
+ type: int
+ level: advanced
+ desc: worker threads for CPU intensive background work
+ fmt_desc: Number of threads for performing CPU intensive work on monitor.
+ default: 4
+ services:
+ - mon
+ with_legacy: true
+- name: mon_tick_interval
+ type: int
+ level: advanced
+ desc: interval for internal mon background checks
+ fmt_desc: A monitor's tick interval in seconds.
+ default: 5
+ services:
+ - mon
+ with_legacy: true
+- name: mon_session_timeout
+ type: int
+ level: advanced
+ desc: close inactive mon client connections after this many seconds
+ fmt_desc: Monitor will terminate inactive sessions stay idle over this
+ time limit.
+ default: 5_min
+ services:
+ - mon
+ with_legacy: true
+- name: mon_subscribe_interval
+ type: float
+ level: dev
+ desc: subscribe interval for pre-jewel clients
+ fmt_desc: The refresh interval (in seconds) for subscriptions. The
+ subscription mechanism enables obtaining cluster maps
+ and log information.
+ default: 1_day
+ services:
+ - mon
+ with_legacy: true
+- name: mon_use_min_delay_socket
+ type: bool
+ level: advanced
+ default: false
+ desc: priority packets between mons
+ with_legacy: true
+ see_also:
+ - osd_heartbeat_use_min_delay_socket