summaryrefslogtreecommitdiffstats
path: root/src/common/options/osd.yaml.in
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/common/options/osd.yaml.in
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/common/options/osd.yaml.in')
-rw-r--r--src/common/options/osd.yaml.in1415
1 files changed, 1415 insertions, 0 deletions
diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in
new file mode 100644
index 000000000..7291ce11d
--- /dev/null
+++ b/src/common/options/osd.yaml.in
@@ -0,0 +1,1415 @@
+# -*- mode: YAML -*-
+---
+
+options:
+- name: osd_numa_prefer_iface
+ type: bool
+ level: advanced
+ desc: prefer IP on network interface on same numa node as storage
+ default: true
+ see_also:
+ - osd_numa_auto_affinity
+ flags:
+ - startup
+- name: osd_numa_auto_affinity
+ type: bool
+ level: advanced
+ desc: automatically set affinity to numa node when storage and network match
+ default: true
+ flags:
+ - startup
+- name: osd_numa_node
+ type: int
+ level: advanced
+ desc: set affinity to a numa node (-1 for none)
+ default: -1
+ see_also:
+ - osd_numa_auto_affinity
+ flags:
+ - startup
+- name: set_keepcaps
+ type: bool
+ level: advanced
+ desc: set the keepcaps flag before changing UID, preserving the permitted capability set
+ long_desc: When ceph switches from root to the ceph uid, all capabilities in all sets are eraseed. If
+ a component that is capability aware needs a specific capability, the keepcaps flag maintains
+ the permitted capability set, allowing the capabilities in the effective set to be activated as needed.
+ default: false
+ flags:
+ - startup
+- name: osd_smart_report_timeout
+ type: uint
+ level: advanced
+ desc: Timeout (in seconds) for smartctl to run, default is set to 5
+ default: 5
+# verify backend can support configured max object name length
+- name: osd_check_max_object_name_len_on_startup
+ type: bool
+ level: dev
+ default: true
+ with_legacy: true
+- name: osd_max_backfills
+ type: uint
+ level: advanced
+ desc: Maximum number of concurrent local and remote backfills or recoveries per
+ OSD
+ long_desc: There can be osd_max_backfills local reservations AND the same remote
+ reservations per OSD. So a value of 1 lets this OSD participate as 1 PG primary
+ in recovery and 1 shard of another recovering PG.
+ fmt_desc: The maximum number of backfills allowed to or from a single OSD.
+ Note that this is applied separately for read and write operations.
+ default: 1
+ flags:
+ - runtime
+ with_legacy: true
+# Minimum recovery priority (255 = max, smaller = lower)
+- name: osd_min_recovery_priority
+ type: int
+ level: advanced
+ desc: Minimum priority below which recovery is not performed
+ long_desc: The purpose here is to prevent the cluster from doing *any* lower priority
+ work (e.g., rebalancing) below this threshold and focus solely on higher priority
+ work (e.g., replicating degraded objects).
+ default: 0
+ with_legacy: true
+- name: osd_backfill_retry_interval
+ type: float
+ level: advanced
+ desc: how frequently to retry backfill reservations after being denied (e.g., due
+ to a full OSD)
+ fmt_desc: The number of seconds to wait before retrying backfill requests.
+ default: 30
+ with_legacy: true
+- name: osd_recovery_retry_interval
+ type: float
+ level: advanced
+ desc: how frequently to retry recovery reservations after being denied (e.g., due
+ to a full OSD)
+ default: 30
+ with_legacy: true
+- name: osd_recovery_sleep
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op. This setting
+ overrides _ssd, _hdd, and _hybrid if non-zero.
+ fmt_desc: Time in seconds to sleep before the next recovery or backfill op.
+ Increasing this value will slow down recovery operation while
+ client operations will be less impacted.
+ default: 0
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_sleep_hdd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op for HDDs
+ fmt_desc: Time in seconds to sleep before next recovery or backfill op
+ for HDDs.
+ default: 0.1
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_sleep_ssd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op for SSDs
+ fmt_desc: Time in seconds to sleep before the next recovery or backfill op
+ for SSDs.
+ default: 0
+ see_also:
+ - osd_recovery_sleep
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_sleep_hybrid
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next recovery or backfill op when data is
+ on HDD and journal is on SSD
+ fmt_desc: Time in seconds to sleep before the next recovery or backfill op
+ when OSD data is on HDD and OSD journal / WAL+DB is on SSD.
+ default: 0.025
+ see_also:
+ - osd_recovery_sleep
+ flags:
+ - runtime
+- name: osd_snap_trim_sleep
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim. This setting overrides _ssd,
+ _hdd, and _hybrid if non-zero.
+ fmt_desc: Time in seconds to sleep before next snap trim op.
+ Increasing this value will slow down snap trimming.
+ This option overrides backend specific variants.
+ default: 0
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_snap_trim_sleep_hdd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim for HDDs
+ default: 5
+ flags:
+ - runtime
+- name: osd_snap_trim_sleep_ssd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim for SSDs
+ fmt_desc: Time in seconds to sleep before next snap trim op
+ for SSD OSDs (including NVMe).
+ default: 0
+ flags:
+ - runtime
+- name: osd_snap_trim_sleep_hybrid
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next snap trim when data is on HDD and journal
+ is on SSD
+ fmt_desc: Time in seconds to sleep before next snap trim op
+ when OSD data is on an HDD and the OSD journal or WAL+DB is on an SSD.
+ default: 2
+ flags:
+ - runtime
+- name: osd_scrub_invalid_stats
+ type: bool
+ level: advanced
+ default: true
+ with_legacy: true
+- name: osd_max_scrubs
+ type: int
+ level: advanced
+ desc: Maximum concurrent scrubs on a single OSD
+ fmt_desc: The maximum number of simultaneous scrub operations for
+ a Ceph OSD Daemon.
+ default: 1
+ with_legacy: true
+- name: osd_scrub_during_recovery
+ type: bool
+ level: advanced
+ desc: Allow scrubbing when PGs on the OSD are undergoing recovery
+ fmt_desc: Allow scrub during recovery. Setting this to ``false`` will disable
+ scheduling new scrub (and deep--scrub) while there is active recovery.
+ Already running scrubs will be continued. This might be useful to reduce
+ load on busy clusters.
+ default: false
+ with_legacy: true
+- name: osd_repair_during_recovery
+ type: bool
+ level: advanced
+ desc: Allow requested repairing when PGs on the OSD are undergoing recovery
+ default: false
+ with_legacy: true
+- name: osd_scrub_begin_hour
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to this hour of the day or later
+ long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
+ fmt_desc: This restricts scrubbing to this hour of the day or later.
+ Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0``
+ to allow scrubbing the entire day. Along with ``osd_scrub_end_hour``, they define a time
+ window, in which the scrubs can happen.
+ But a scrub will be performed
+ no matter whether the time window allows or not, as long as the placement
+ group's scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_end_hour
+ min: 0
+ max: 23
+ with_legacy: true
+- name: osd_scrub_end_hour
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to hours of the day earlier than this
+ long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
+ fmt_desc: This restricts scrubbing to the hour earlier than this.
+ Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0`` to allow scrubbing
+ for the entire day. Along with ``osd_scrub_begin_hour``, they define a time
+ window, in which the scrubs can happen. But a scrub will be performed
+ no matter whether the time window allows or not, as long as the placement
+ group's scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_begin_hour
+ min: 0
+ max: 23
+ with_legacy: true
+- name: osd_scrub_begin_week_day
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to this day of the week or later
+ long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
+ for the entire week.
+ fmt_desc: This restricts scrubbing to this day of the week or later.
+ 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
+ and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
+ Along with ``osd_scrub_end_week_day``, they define a time window in which
+ scrubs can happen. But a scrub will be performed
+ no matter whether the time window allows or not, when the PG's
+ scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_end_week_day
+ min: 0
+ max: 6
+ with_legacy: true
+- name: osd_scrub_end_week_day
+ type: int
+ level: advanced
+ desc: Restrict scrubbing to days of the week earlier than this
+ long_desc: 0 = Sunday, 1 = Monday, etc. Use osd_scrub_begin_week_day=0 osd_scrub_end_week_day=0
+ for the entire week.
+ fmt_desc: This restricts scrubbing to days of the week earlier than this.
+ 0 = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
+ and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
+ Along with ``osd_scrub_begin_week_day``, they define a time
+ window, in which the scrubs can happen. But a scrub will be performed
+ no matter whether the time window allows or not, as long as the placement
+ group's scrub interval exceeds ``osd_scrub_max_interval``.
+ default: 0
+ see_also:
+ - osd_scrub_begin_week_day
+ min: 0
+ max: 6
+ with_legacy: true
+- name: osd_scrub_load_threshold
+ type: float
+ level: advanced
+ desc: Allow scrubbing when system load divided by number of CPUs is below this value
+ fmt_desc: The normalized maximum load. Ceph will not scrub when the system load
+ (as defined by ``getloadavg() / number of online CPUs``) is higher than this number.
+ Default is ``0.5``.
+ default: 0.5
+ with_legacy: true
+# if load is low
+- name: osd_scrub_min_interval
+ type: float
+ level: advanced
+ desc: Scrub each PG no more often than this interval
+ fmt_desc: The minimal interval in seconds for scrubbing the Ceph OSD Daemon
+ when the Ceph Storage Cluster load is low.
+ default: 1_day
+ see_also:
+ - osd_scrub_max_interval
+ with_legacy: true
+# regardless of load
+- name: osd_scrub_max_interval
+ type: float
+ level: advanced
+ desc: Scrub each PG no less often than this interval
+ fmt_desc: The maximum interval in seconds for scrubbing the Ceph OSD Daemon
+ irrespective of cluster load.
+ default: 7_day
+ see_also:
+ - osd_scrub_min_interval
+ with_legacy: true
+# randomize the scheduled scrub in the span of [min,min*(1+randomize_ratio))
+- name: osd_scrub_interval_randomize_ratio
+ type: float
+ level: advanced
+ desc: Ratio of scrub interval to randomly vary
+ long_desc: This prevents a scrub 'stampede' by randomly varying the scrub intervals
+ so that they are soon uniformly distributed over the week
+ fmt_desc: Add a random delay to ``osd_scrub_min_interval`` when scheduling
+ the next scrub job for a PG. The delay is a random
+ value less than ``osd_scrub_min_interval`` \*
+ ``osd_scrub_interval_randomized_ratio``. The default setting
+ spreads scrubs throughout the allowed time
+ window of ``[1, 1.5]`` \* ``osd_scrub_min_interval``.
+ default: 0.5
+ see_also:
+ - osd_scrub_min_interval
+ with_legacy: true
+# the probability to back off the scheduled scrub
+- name: osd_scrub_backoff_ratio
+ type: float
+ level: dev
+ desc: Backoff ratio for scheduling scrubs
+ long_desc: This is the precentage of ticks that do NOT schedule scrubs, 66% means
+ that 1 out of 3 ticks will schedule scrubs
+ default: 0.66
+ with_legacy: true
+- name: osd_scrub_chunk_min
+ type: int
+ level: advanced
+ desc: Minimum number of objects to deep-scrub in a single chunk
+ fmt_desc: The minimal number of object store chunks to scrub during single operation.
+ Ceph blocks writes to single chunk during scrub.
+ default: 5
+ see_also:
+ - osd_scrub_chunk_max
+ with_legacy: true
+- name: osd_scrub_chunk_max
+ type: int
+ level: advanced
+ desc: Maximum number of objects to deep-scrub in a single chunk
+ fmt_desc: The maximum number of object store chunks to scrub during single operation.
+ default: 25
+ see_also:
+ - osd_scrub_chunk_min
+ with_legacy: true
+- name: osd_shallow_scrub_chunk_min
+ type: int
+ level: advanced
+ desc: Minimum number of objects to scrub in a single chunk
+ fmt_desc: The minimum number of object store chunks to scrub during single operation.
+ Not applicable to deep scrubs.
+ Ceph blocks writes to single chunk during scrub.
+ default: 50
+ see_also:
+ - osd_shallow_scrub_chunk_max
+ - osd_scrub_chunk_min
+ with_legacy: true
+- name: osd_shallow_scrub_chunk_max
+ type: int
+ level: advanced
+ desc: Maximum number of objects to scrub in a single chunk
+ fmt_desc: The maximum number of object store chunks to scrub during single operation.
+ Not applicable to deep scrubs.
+ default: 100
+ see_also:
+ - osd_shallow_scrub_chunk_min
+ - osd_scrub_chunk_max
+ with_legacy: true
+# sleep between [deep]scrub ops
+- name: osd_scrub_sleep
+ type: float
+ level: advanced
+ desc: Duration to inject a delay during scrubbing
+ fmt_desc: Time to sleep before scrubbing the next group of chunks. Increasing this value will slow
+ down the overall rate of scrubbing so that client operations will be less impacted.
+ default: 0
+ flags:
+ - runtime
+ with_legacy: true
+# more sleep between [deep]scrub ops
+- name: osd_scrub_extended_sleep
+ type: float
+ level: advanced
+ desc: Duration to inject a delay during scrubbing out of scrubbing hours
+ default: 0
+ see_also:
+ - osd_scrub_begin_hour
+ - osd_scrub_end_hour
+ - osd_scrub_begin_week_day
+ - osd_scrub_end_week_day
+ with_legacy: true
+# whether auto-repair inconsistencies upon deep-scrubbing
+- name: osd_scrub_auto_repair
+ type: bool
+ level: advanced
+ desc: Automatically repair damaged objects detected during scrub
+ fmt_desc: Setting this to ``true`` will enable automatic PG repair when errors
+ are found by scrubs or deep-scrubs. However, if more than
+ ``osd_scrub_auto_repair_num_errors`` errors are found a repair is NOT performed.
+ default: false
+ with_legacy: true
+# only auto-repair when number of errors is below this threshold
+- name: osd_scrub_auto_repair_num_errors
+ type: uint
+ level: advanced
+ desc: Maximum number of detected errors to automatically repair
+ fmt_desc: Auto repair will not occur if more than this many errors are found.
+ default: 5
+ see_also:
+ - osd_scrub_auto_repair
+ with_legacy: true
+- name: osd_scrub_max_preemptions
+ type: uint
+ level: advanced
+ desc: Set the maximum number of times we will preempt a deep scrub due to a client
+ operation before blocking client IO to complete the scrub
+ default: 5
+ min: 0
+ max: 30
+- name: osd_deep_scrub_interval
+ type: float
+ level: advanced
+ desc: Deep scrub each PG (i.e., verify data checksums) at least this often
+ fmt_desc: The interval for "deep" scrubbing (fully reading all data). The
+ ``osd_scrub_load_threshold`` does not affect this setting.
+ default: 7_day
+ with_legacy: true
+- name: osd_deep_scrub_randomize_ratio
+ type: float
+ level: advanced
+ desc: Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs
+ are deep)
+ long_desc: This prevents a deep scrub 'stampede' by spreading deep scrubs so they
+ are uniformly distributed over the week
+ default: 0.15
+ with_legacy: true
+- name: osd_deep_scrub_stride
+ type: size
+ level: advanced
+ desc: Number of bytes to read from an object at a time during deep scrub
+ fmt_desc: Read size when doing a deep scrub.
+ default: 512_K
+ with_legacy: true
+- name: osd_deep_scrub_keys
+ type: int
+ level: advanced
+ desc: Number of keys to read from an object at a time during deep scrub
+ default: 1024
+ with_legacy: true
+# objects must be this old (seconds) before we update the whole-object digest on scrub
+- name: osd_deep_scrub_update_digest_min_age
+ type: int
+ level: advanced
+ desc: Update overall object digest only if object was last modified longer ago than
+ this
+ default: 2_hr
+ with_legacy: true
+- name: osd_deep_scrub_large_omap_object_key_threshold
+ type: uint
+ level: advanced
+ desc: Warn when we encounter an object with more omap keys than this
+ default: 200000
+ services:
+ - osd
+ - mds
+ see_also:
+ - osd_deep_scrub_large_omap_object_value_sum_threshold
+ with_legacy: true
+- name: osd_deep_scrub_large_omap_object_value_sum_threshold
+ type: size
+ level: advanced
+ desc: Warn when we encounter an object with more omap key bytes than this
+ default: 1_G
+ services:
+ - osd
+ see_also:
+ - osd_deep_scrub_large_omap_object_key_threshold
+ with_legacy: true
+# when scrubbing blocks on a locked object
+- name: osd_blocked_scrub_grace_period
+ type: int
+ level: advanced
+ desc: Time (seconds) before issuing a cluster-log warning
+ long_desc: Waiting too long for an object in the scrubbed chunk to be unlocked.
+ default: 120
+ with_legacy: true
+# timely updates to the 'pg dump' output, esp. re scrub scheduling
+- name: osd_stats_update_period_scrubbing
+ type: int
+ level: advanced
+ desc: Stats update period (seconds) when scrubbing
+ long_desc: A PG actively scrubbing (or blocked while scrubbing) publishes its
+ stats (inc. scrub/block duration) every this many seconds.
+ default: 15
+ with_legacy: false
+- name: osd_stats_update_period_not_scrubbing
+ type: int
+ level: advanced
+ desc: Stats update period (seconds) when not scrubbing
+ long_desc: A PG we are a primary of, publishes its
+ stats (inc. scrub/block duration) every this many seconds.
+ default: 120
+ with_legacy: false
+# when replicas are slow to respond to scrub resource reservations
+# Note: disable by using a very large value
+- name: osd_scrub_slow_reservation_response
+ type: millisecs
+ level: advanced
+ desc: Duration before issuing a cluster-log warning
+ long_desc: Waiting too long for a replica to respond (after at least half of the
+ replicas have responded).
+ default: 2200
+ min: 500
+ see_also:
+ - osd_scrub_reservation_timeout
+ with_legacy: false
+# when a replica does not respond to scrub resource request
+# Note: disable by using a very large value
+- name: osd_scrub_reservation_timeout
+ type: millisecs
+ level: advanced
+ desc: Duration before aborting the scrub session
+ long_desc: Waiting too long for some replicas to respond to
+ scrub reservation requests.
+ default: 5000
+ min: 2000
+ see_also:
+ - osd_scrub_slow_reservation_response
+ with_legacy: false
+# where rados plugins are stored
+- name: osd_class_dir
+ type: str
+ level: advanced
+ default: @CMAKE_INSTALL_LIBDIR@/rados-classes
+ fmt_desc: The class path for RADOS class plug-ins.
+ with_legacy: true
+- name: osd_open_classes_on_start
+ type: bool
+ level: advanced
+ default: true
+ with_legacy: true
+# list of object classes allowed to be loaded (allow all: *)
+- name: osd_class_load_list
+ type: str
+ level: advanced
+ default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
+ user version cas cmpomap queue 2pc_queue fifo
+ with_legacy: true
+# list of object classes with default execute perm (allow all: *)
+- name: osd_class_default_list
+ type: str
+ level: advanced
+ default: cephfs hello journal lock log numops otp rbd refcount rgw rgw_gc timeindex
+ user version cas cmpomap queue 2pc_queue fifo
+ with_legacy: true
+- name: osd_agent_max_ops
+ type: int
+ level: advanced
+ desc: maximum concurrent tiering operations for tiering agent
+ fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
+ in the high speed mode.
+ default: 4
+ with_legacy: true
+- name: osd_agent_max_low_ops
+ type: int
+ level: advanced
+ desc: maximum concurrent low-priority tiering operations for tiering agent
+ fmt_desc: The maximum number of simultaneous flushing ops per tiering agent
+ in the low speed mode.
+ default: 2
+ with_legacy: true
+- name: osd_agent_min_evict_effort
+ type: float
+ level: advanced
+ desc: minimum effort to expend evicting clean objects
+ default: 0.1
+ min: 0
+ max: 0.99
+ with_legacy: true
+- name: osd_agent_quantize_effort
+ type: float
+ level: advanced
+ desc: size of quantize unit for eviction effort
+ default: 0.1
+ with_legacy: true
+- name: osd_agent_delay_time
+ type: float
+ level: advanced
+ desc: how long agent should sleep if it has no work to do
+ default: 5
+ with_legacy: true
+# decay atime and hist histograms after how many objects go by
+- name: osd_agent_hist_halflife
+ type: int
+ level: advanced
+ desc: halflife of agent atime and temp histograms
+ default: 1000
+ with_legacy: true
+# decay atime and hist histograms after how many objects go by
+- name: osd_agent_slop
+ type: float
+ level: advanced
+ desc: slop factor to avoid switching tiering flush and eviction mode
+ default: 0.02
+ with_legacy: true
+- name: osd_find_best_info_ignore_history_les
+ type: bool
+ level: dev
+ desc: ignore last_epoch_started value when peering AND PROBABLY LOSE DATA
+ long_desc: THIS IS AN EXTREMELY DANGEROUS OPTION THAT SHOULD ONLY BE USED AT THE
+ DIRECTION OF A DEVELOPER. It makes peering ignore the last_epoch_started value
+ when peering, which can allow the OSD to believe an OSD has an authoritative view
+ of a PG's contents even when it is in fact old and stale, typically leading to
+ data loss (by believing a stale PG is up to date).
+ default: false
+ with_legacy: true
+- name: osd_uuid
+ type: uuid
+ level: advanced
+ desc: uuid label for a new OSD
+ fmt_desc: The universally unique identifier (UUID) for the Ceph OSD Daemon.
+ note: The ``osd_uuid`` applies to a single Ceph OSD Daemon. The ``fsid``
+ applies to the entire cluster.
+ flags:
+ - create
+ with_legacy: true
+- name: osd_data
+ type: str
+ level: advanced
+ desc: path to OSD data
+ fmt_desc: The path to the OSDs data. You must create the directory when
+ deploying Ceph. You should mount a drive for OSD data at this
+ mount point. We do not recommend changing the default.
+ default: /var/lib/ceph/osd/$cluster-$id
+ flags:
+ - no_mon_update
+ with_legacy: true
+- name: osd_journal
+ type: str
+ level: advanced
+ desc: path to OSD journal (when FileStore backend is in use)
+ fmt_desc: The path to the OSD's journal. This may be a path to a file or a
+ block device (such as a partition of an SSD). If it is a file,
+ you must create the directory to contain it. We recommend using a
+ separate fast device when the ``osd_data`` drive is an HDD.
+ default: /var/lib/ceph/osd/$cluster-$id/journal
+ flags:
+ - no_mon_update
+ with_legacy: true
+- name: osd_journal_size
+ type: size
+ level: advanced
+ desc: size of FileStore journal (in MiB)
+ fmt_desc: The size of the journal in megabytes.
+ default: 5_K
+ flags:
+ - create
+ with_legacy: true
+- name: osd_journal_flush_on_shutdown
+ type: bool
+ level: advanced
+ desc: flush FileStore journal contents during clean OSD shutdown
+ default: true
+ with_legacy: true
+- name: osd_compact_on_start
+ type: bool
+ level: advanced
+ desc: compact OSD's object store's OMAP on start
+ default: false
+# flags for specific control purpose during osd mount() process.
+# e.g., can be 1 to skip over replaying journal
+# or 2 to skip over mounting omap or 3 to skip over both.
+# This might be helpful in case the journal is totally corrupted
+# and we still want to bring the osd daemon back normally, etc.
+- name: osd_os_flags
+ type: uint
+ level: dev
+ desc: flags to skip filestore omap or journal initialization
+ default: 0
+- name: osd_max_write_size
+ type: size
+ level: advanced
+ desc: Maximum size of a RADOS write operation in megabytes
+ long_desc: This setting prevents clients from doing very large writes to RADOS. If
+ you set this to a value below what clients expect, they will receive an error
+ when attempting to write to the cluster.
+ fmt_desc: The maximum size of a write in megabytes.
+ default: 90
+ min: 4
+ with_legacy: true
+- name: osd_max_pgls
+ type: uint
+ level: advanced
+ desc: maximum number of results when listing objects in a pool
+ fmt_desc: The maximum number of placement groups to list. A client
+ requesting a large number can tie up the Ceph OSD Daemon.
+ default: 1_K
+ with_legacy: true
+- name: osd_client_message_size_cap
+ type: size
+ level: advanced
+ desc: maximum memory to devote to in-flight client requests
+ long_desc: If this value is exceeded, the OSD will not read any new client data
+ off of the network until memory is freed.
+ fmt_desc: The largest client data message allowed in memory.
+ default: 500_M
+ with_legacy: true
+- name: osd_client_message_cap
+ type: uint
+ level: advanced
+ desc: maximum number of in-flight client requests
+ default: 256
+ with_legacy: true
+- name: osd_crush_update_on_start
+ type: bool
+ level: advanced
+ desc: update OSD CRUSH location on startup
+ default: true
+ with_legacy: true
+- name: osd_class_update_on_start
+ type: bool
+ level: advanced
+ desc: set OSD device class on startup
+ default: true
+ with_legacy: true
+- name: osd_crush_initial_weight
+ type: float
+ level: advanced
+ desc: if >= 0, initial CRUSH weight for newly created OSDs
+ long_desc: If this value is negative, the size of the OSD in TiB is used.
+ fmt_desc: The initial CRUSH weight for newly added OSDs. The default
+ value of this option is ``the size of a newly added OSD in TB``. By default,
+ the initial CRUSH weight for a newly added OSD is set to its device size in
+ TB. See `Weighting Bucket Items`_ for details.
+ default: -1
+ with_legacy: true
+# Allows the "peered" state for recovery and backfill below min_size
+- name: osd_allow_recovery_below_min_size
+ type: bool
+ level: dev
+ desc: allow replicated pools to recover with < min_size active members
+ default: true
+ services:
+ - osd
+ with_legacy: true
+# cap on # of inc maps we send to peers, clients
+- name: osd_map_share_max_epochs
+ type: int
+ level: advanced
+ default: 40
+ with_legacy: true
+- name: osd_map_cache_size
+ type: int
+ level: advanced
+ default: 50
+ fmt_desc: The number of OSD maps to keep cached.
+ with_legacy: true
+- name: osd_pg_epoch_max_lag_factor
+ type: float
+ level: advanced
+ desc: Max multiple of the map cache that PGs can lag before we throttle map injest
+ default: 2
+ see_also:
+ - osd_map_cache_size
+- name: osd_inject_bad_map_crc_probability
+ type: float
+ level: dev
+ default: 0
+ with_legacy: true
+- name: osd_inject_failure_on_pg_removal
+ type: bool
+ level: dev
+ default: false
+ with_legacy: true
+# shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
+- name: osd_max_markdown_period
+ type: int
+ level: advanced
+ default: 10_min
+ with_legacy: true
+- name: osd_max_markdown_count
+ type: int
+ level: advanced
+ default: 5
+ with_legacy: true
+- name: osd_op_thread_timeout
+ type: int
+ level: advanced
+ default: 15
+ fmt_desc: The Ceph OSD Daemon operation thread timeout in seconds.
+ with_legacy: true
+- name: osd_op_thread_suicide_timeout
+ type: int
+ level: advanced
+ default: 150
+ with_legacy: true
+- name: osd_op_pq_max_tokens_per_priority
+ type: uint
+ level: advanced
+ default: 4_M
+ with_legacy: true
+- name: osd_op_pq_min_cost
+ type: size
+ level: advanced
+ default: 64_K
+ with_legacy: true
+# preserve clone_overlap during recovery/migration
+- name: osd_recover_clone_overlap
+ type: bool
+ level: advanced
+ default: true
+ fmt_desc: Preserves clone overlap during recovery. Should always be set
+ to ``true``.
+ with_legacy: true
+- name: osd_num_cache_shards
+ type: size
+ level: advanced
+ desc: The number of cache shards to use in the object store.
+ default: 32
+ flags:
+ - startup
+- name: osd_aggregated_slow_ops_logging
+ type: bool
+ level: advanced
+ desc: Allow OSD daemon to send an aggregated slow ops to the cluster log
+ fmt_desc: If set to ``true``, the OSD daemon will send slow ops information in
+ an aggregated format to the cluster log else sends every slow op to the
+ cluster log.
+ default: true
+ with_legacy: true
+- name: osd_op_num_threads_per_shard
+ type: int
+ level: advanced
+ default: 0
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_threads_per_shard_hdd
+ type: int
+ level: advanced
+ default: 1
+ see_also:
+ - osd_op_num_threads_per_shard
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_threads_per_shard_ssd
+ type: int
+ level: advanced
+ default: 2
+ see_also:
+ - osd_op_num_threads_per_shard
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_shards
+ type: int
+ level: advanced
+ fmt_desc: The number of shards allocated for a given OSD. Each shard has its own processing queue.
+ PGs on the OSD are distributed evenly in the shard. This setting overrides _ssd and _hdd if
+ non-zero.
+ default: 0
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_shards_hdd
+ type: int
+ level: advanced
+ fmt_desc: the number of shards allocated for a given OSD (for rotational media).
+ default: 5
+ see_also:
+ - osd_op_num_shards
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_op_num_shards_ssd
+ type: int
+ level: advanced
+ fmt_desc: the number of shards allocated for a given OSD (for solid state media).
+ default: 8
+ see_also:
+ - osd_op_num_shards
+ flags:
+ - startup
+ with_legacy: true
+- name: osd_skip_data_digest
+ type: bool
+ level: dev
+ desc: Do not store full-object checksums if the backend (bluestore) does its own
+ checksums. Only usable with all BlueStore OSDs.
+ default: false
+# PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
+# mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
+# and "mclock_client" are based on the mClock/dmClock algorithm
+# (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
+# class the operation belongs to. "mclock_client" does the same but
+# also works to ienforce fairness between clients. "debug_random"
+# chooses among all four with equal probability.
+- name: osd_op_queue
+ type: str
+ level: advanced
+ desc: which operation priority queue algorithm to use
+ long_desc: which operation priority queue algorithm to use
+ fmt_desc: This sets the type of queue to be used for prioritizing ops
+ within each OSD. Both queues feature a strict sub-queue which is
+ dequeued before the normal queue. The normal queue is different
+ between implementations. The WeightedPriorityQueue (``wpq``)
+ dequeues operations in relation to their priorities to prevent
+ starvation of any queue. WPQ should help in cases where a few OSDs
+ are more overloaded than others. The mClockQueue
+ (``mclock_scheduler``) prioritizes operations based on which class
+ they belong to (recovery, scrub, snaptrim, client op, osd subop).
+ See `QoS Based on mClock`_. Requires a restart.
+ default: mclock_scheduler
+ see_also:
+ - osd_op_queue_cut_off
+ enum_values:
+ - wpq
+ - mclock_scheduler
+ - debug_random
+ with_legacy: true
+# Min priority to go to strict queue. (low, high)
+- name: osd_op_queue_cut_off
+ type: str
+ level: advanced
+ desc: the threshold between high priority ops and low priority ops
+ long_desc: the threshold between high priority ops that use strict priority ordering
+ and low priority ops that use a fairness algorithm that may or may not incorporate
+ priority
+ fmt_desc: This selects which priority ops will be sent to the strict
+ queue verses the normal queue. The ``low`` setting sends all
+ replication ops and higher to the strict queue, while the ``high``
+ option sends only replication acknowledgment ops and higher to
+ the strict queue. Setting this to ``high`` should help when a few
+ OSDs in the cluster are very busy especially when combined with
+ ``wpq`` in the ``osd_op_queue`` setting. OSDs that are very busy
+ handling replication traffic could starve primary client traffic
+ on these OSDs without these settings. Requires a restart.
+ default: high
+ see_also:
+ - osd_op_queue
+ enum_values:
+ - low
+ - high
+ - debug_random
+ with_legacy: true
+- name: osd_mclock_scheduler_client_res
+ type: float
+ level: advanced
+ desc: IO proportion reserved for each client (default). The default value
+ of 0 specifies the lowest possible reservation. Any value greater than
+ 0 and up to 1.0 specifies the minimum IO proportion to reserve for each
+ client in terms of a fraction of the OSD's maximum IOPS capacity.
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO proportion reserved for each client (default).
+ default: 0
+ min: 0
+ max: 1.0
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_client_wgt
+ type: uint
+ level: advanced
+ desc: IO share for each client (default) over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO share for each client (default) over reservation.
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_client_lim
+ type: float
+ level: advanced
+ desc: IO limit for each client (default) over reservation. The default
+ value of 0 specifies no limit enforcement, which means each client can
+ use the maximum possible IOPS capacity of the OSD. Any value greater
+ than 0 and up to 1.0 specifies the upper IO limit over reservation
+ that each client receives in terms of a fraction of the OSD's
+ maximum IOPS capacity.
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO limit for each client (default) over reservation.
+ default: 0
+ min: 0
+ max: 1.0
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_recovery_res
+ type: float
+ level: advanced
+ desc: IO proportion reserved for background recovery (default). The
+ default value of 0 specifies the lowest possible reservation. Any value
+ greater than 0 and up to 1.0 specifies the minimum IO proportion to
+ reserve for background recovery operations in terms of a fraction of
+ the OSD's maximum IOPS capacity.
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO proportion reserved for background recovery (default).
+ default: 0
+ min: 0
+ max: 1.0
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_recovery_wgt
+ type: uint
+ level: advanced
+ desc: IO share for each background recovery over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO share for each background recovery over reservation.
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_recovery_lim
+ type: float
+ level: advanced
+ desc: IO limit for background recovery over reservation. The default
+ value of 0 specifies no limit enforcement, which means background
+ recovery operation can use the maximum possible IOPS capacity of the
+ OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
+ limit over reservation that background recovery operation receives in
+ terms of a fraction of the OSD's maximum IOPS capacity.
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO limit for background recovery over reservation.
+ default: 0
+ min: 0
+ max: 1.0
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_best_effort_res
+ type: float
+ level: advanced
+ desc: IO proportion reserved for background best_effort (default). The
+ default value of 0 specifies the lowest possible reservation. Any value
+ greater than 0 and up to 1.0 specifies the minimum IO proportion to
+ reserve for background best_effort operations in terms of a fraction
+ of the OSD's maximum IOPS capacity.
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO proportion reserved for background best_effort (default).
+ default: 0
+ min: 0
+ max: 1.0
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_best_effort_wgt
+ type: uint
+ level: advanced
+ desc: IO share for each background best_effort over reservation
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO share for each background best_effort over reservation.
+ default: 1
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_background_best_effort_lim
+ type: float
+ level: advanced
+ desc: IO limit for background best_effort over reservation. The default
+ value of 0 specifies no limit enforcement, which means background
+ best_effort operation can use the maximum possible IOPS capacity of the
+ OSD. Any value greater than 0 and up to 1.0 specifies the upper IO
+ limit over reservation that background best_effort operation receives
+ in terms of a fraction of the OSD's maximum IOPS capacity.
+ long_desc: Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: IO limit for background best_effort over reservation.
+ default: 0
+ min: 0
+ max: 1.0
+ see_also:
+ - osd_op_queue
+- name: osd_mclock_scheduler_anticipation_timeout
+ type: float
+ level: advanced
+ desc: mclock anticipation timeout in seconds
+ long_desc: the amount of time that mclock waits until the unused resource is forfeited
+ default: 0
+- name: osd_mclock_max_sequential_bandwidth_hdd
+ type: size
+ level: basic
+ desc: The maximum sequential bandwidth in bytes/second of the OSD (for
+ rotational media)
+ long_desc: This option specifies the maximum sequential bandwidth to consider
+ for an OSD whose underlying device type is rotational media. This is
+ considered by the mclock scheduler to derive the cost factor to be used in
+ QoS calculations. Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: The maximum sequential bandwidth in bytes/second to consider for the
+ OSD (for rotational media)
+ default: 150_M
+ flags:
+ - runtime
+- name: osd_mclock_max_sequential_bandwidth_ssd
+ type: size
+ level: basic
+ desc: The maximum sequential bandwidth in bytes/second of the OSD (for
+ solid state media)
+ long_desc: This option specifies the maximum sequential bandwidth to consider
+ for an OSD whose underlying device type is solid state media. This is
+ considered by the mclock scheduler to derive the cost factor to be used in
+ QoS calculations. Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: The maximum sequential bandwidth in bytes/second to consider for the
+ OSD (for solid state media)
+ default: 1200_M
+ flags:
+ - runtime
+- name: osd_mclock_max_capacity_iops_hdd
+ type: float
+ level: basic
+ desc: Max random write IOPS capacity (at 4KiB block size) to consider per OSD
+ (for rotational media)
+ long_desc: This option specifies the max OSD random write IOPS capacity per
+ OSD. Contributes in QoS calculations when enabling a dmclock profile. Only
+ considered for osd_op_queue = mclock_scheduler
+ fmt_desc: Max random write IOPS capacity (at 4 KiB block size) to consider per
+ OSD (for rotational media)
+ default: 315
+ flags:
+ - runtime
+- name: osd_mclock_max_capacity_iops_ssd
+ type: float
+ level: basic
+ desc: Max random write IOPS capacity (at 4 KiB block size) to consider per OSD
+ (for solid state media)
+ long_desc: This option specifies the max OSD random write IOPS capacity per
+ OSD. Contributes in QoS calculations when enabling a dmclock profile. Only
+ considered for osd_op_queue = mclock_scheduler
+ fmt_desc: Max random write IOPS capacity (at 4 KiB block size) to consider per
+ OSD (for solid state media)
+ default: 21500
+ flags:
+ - runtime
+- name: osd_mclock_force_run_benchmark_on_init
+ type: bool
+ level: advanced
+ desc: Force run the OSD benchmark on OSD initialization/boot-up
+ long_desc: This option specifies whether the OSD benchmark must be run during
+ the OSD boot-up sequence even if historical data about the OSD iops capacity
+ is available in the MON config store. Enable this to refresh the OSD iops
+ capacity if the underlying device's performance characteristics have changed
+ significantly. Only considered for osd_op_queue = mclock_scheduler.
+ fmt_desc: Force run the OSD benchmark on OSD initialization/boot-up
+ default: false
+ see_also:
+ - osd_mclock_max_capacity_iops_hdd
+ - osd_mclock_max_capacity_iops_ssd
+ flags:
+ - startup
+- name: osd_mclock_skip_benchmark
+ type: bool
+ level: dev
+ desc: Skip the OSD benchmark on OSD initialization/boot-up
+ long_desc: This option specifies whether the OSD benchmark must be skipped during
+ the OSD boot-up sequence. Only considered for osd_op_queue = mclock_scheduler.
+ fmt_desc: Skip the OSD benchmark on OSD initialization/boot-up
+ default: false
+ see_also:
+ - osd_mclock_max_capacity_iops_hdd
+ - osd_mclock_max_capacity_iops_ssd
+ flags:
+ - runtime
+- name: osd_mclock_profile
+ type: str
+ level: advanced
+ desc: Which mclock profile to use
+ long_desc: This option specifies the mclock profile to enable - one among the set
+ of built-in profiles or a custom profile. Only considered for osd_op_queue = mclock_scheduler
+ fmt_desc: |
+ This sets the type of mclock profile to use for providing QoS
+ based on operations belonging to different classes (background
+ recovery, scrub, snaptrim, client op, osd subop). Once a built-in
+ profile is enabled, the lower level mclock resource control
+ parameters [*reservation, weight, limit*] and some Ceph
+ configuration parameters are set transparently. Note that the
+ above does not apply for the *custom* profile.
+ default: balanced
+ see_also:
+ - osd_op_queue
+ enum_values:
+ - balanced
+ - high_recovery_ops
+ - high_client_ops
+ - custom
+ flags:
+ - runtime
+- name: osd_mclock_override_recovery_settings
+ type: bool
+ level: advanced
+ desc: Setting this option enables the override of recovery/backfill limits
+ for the mClock scheduler.
+ long_desc: This option when set enables the override of the max recovery
+ active and the max backfills limits with mClock scheduler active. These
+ options are not modifiable when mClock scheduler is active. Any attempt
+ to modify these values without setting this option will reset the
+ recovery or backfill option back to its default value.
+ fmt_desc: Setting this option will enable the override of the
+ recovery/backfill limits for the mClock scheduler as defined by the
+ ``osd_recovery_max_active_hdd``, ``osd_recovery_max_active_ssd`` and
+ ``osd_max_backfills`` options.
+ default: false
+ see_also:
+ - osd_recovery_max_active_hdd
+ - osd_recovery_max_active_ssd
+ - osd_max_backfills
+ flags:
+ - runtime
+- name: osd_mclock_iops_capacity_threshold_hdd
+ type: float
+ level: basic
+ desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore
+ the OSD bench results for an OSD (for rotational media)
+ long_desc: This option specifies the threshold IOPS capacity for an OSD under
+ which the OSD bench results can be considered for QoS calculations. Only
+ considered for osd_op_queue = mclock_scheduler
+ fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to
+ ignore OSD bench results for an OSD (for rotational media)
+ default: 500
+ flags:
+ - runtime
+- name: osd_mclock_iops_capacity_threshold_ssd
+ type: float
+ level: basic
+ desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore
+ the OSD bench results for an OSD (for solid state media)
+ long_desc: This option specifies the threshold IOPS capacity for an OSD under
+ which the OSD bench results can be considered for QoS calculations. Only
+ considered for osd_op_queue = mclock_scheduler
+ fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to
+ ignore OSD bench results for an OSD (for solid state media)
+ default: 80000
+ flags:
+ - runtime
+# Set to true for testing. Users should NOT set this.
+# If set to true even after reading enough shards to
+# decode the object, any error will be reported.
+- name: osd_read_ec_check_for_errors
+ type: bool
+ level: advanced
+ default: false
+ with_legacy: true
+- name: osd_recovery_delay_start
+ type: float
+ level: advanced
+ default: 0
+ fmt_desc: After peering completes, Ceph will delay for the specified number
+ of seconds before starting to recover RADOS objects.
+ with_legacy: true
+- name: osd_recovery_max_active
+ type: uint
+ level: advanced
+ desc: Number of simultaneous active recovery operations per OSD (overrides _ssd
+ and _hdd if non-zero)
+ fmt_desc: The number of active recovery requests per OSD at one time. More
+ requests will accelerate recovery, but the requests places an
+ increased load on the cluster.
+ note: This value is only used if it is non-zero. Normally it
+ is ``0``, which means that the ``hdd`` or ``ssd`` values
+ (below) are used, depending on the type of the primary
+ device backing the OSD.
+ default: 0
+ see_also:
+ - osd_recovery_max_active_hdd
+ - osd_recovery_max_active_ssd
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_max_active_hdd
+ type: uint
+ level: advanced
+ desc: Number of simultaneous active recovery operations per OSD (for rotational
+ devices)
+ fmt_desc: The number of active recovery requests per OSD at one time, if the
+ primary device is rotational.
+ default: 3
+ see_also:
+ - osd_recovery_max_active
+ - osd_recovery_max_active_ssd
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_max_active_ssd
+ type: uint
+ level: advanced
+ desc: Number of simultaneous active recovery operations per OSD (for non-rotational
+ solid state devices)
+ fmt_desc: The number of active recovery requests per OSD at one time, if the
+ primary device is non-rotational (i.e., an SSD).
+ default: 10
+ see_also:
+ - osd_recovery_max_active
+ - osd_recovery_max_active_hdd
+ flags:
+ - runtime
+ with_legacy: true
+- name: osd_recovery_max_single_start
+ type: uint
+ level: advanced
+ default: 1
+ fmt_desc: The maximum number of recovery operations per OSD that will be
+ newly started when an OSD is recovering.
+ with_legacy: true
+# max size of push chunk
+- name: osd_recovery_max_chunk
+ type: size
+ level: advanced
+ default: 8_M
+ fmt_desc: the maximum total size of data chunks a recovery op can carry.
+ with_legacy: true
+# max number of omap entries per chunk; 0 to disable limit
+- name: osd_recovery_max_omap_entries_per_chunk
+ type: uint
+ level: advanced
+ default: 8096
+ with_legacy: true
+# max size of a COPYFROM chunk
+- name: osd_copyfrom_max_chunk
+ type: size
+ level: advanced
+ default: 8_M
+ with_legacy: true
+# push cost per object
+- name: osd_push_per_object_cost
+ type: size
+ level: advanced
+ default: 1000
+ fmt_desc: the overhead for serving a push op
+ with_legacy: true
+# max size of push message
+- name: osd_max_push_cost
+ type: size
+ level: advanced
+ default: 8_M
+ with_legacy: true
+# max objects in single push op
+- name: osd_max_push_objects
+ type: uint
+ level: advanced
+ default: 10
+ with_legacy: true
+# Only use clone_overlap for recovery if there are fewer than
+# osd_recover_clone_overlap_limit entries in the overlap set
+- name: osd_recover_clone_overlap_limit
+ type: uint
+ level: advanced
+ default: 10
+ flags:
+ - runtime
+- name: osd_debug_feed_pullee
+ type: int
+ level: dev
+ desc: Feed a pullee, and force primary to pull a currently missing object from it
+ default: -1
+ with_legacy: true
+- name: osd_backfill_scan_min
+ type: int
+ level: advanced
+ default: 64
+ fmt_desc: The minimum number of objects per backfill scan.
+ with_legacy: true
+- name: osd_backfill_scan_max
+ type: int
+ level: advanced
+ default: 512
+ fmt_desc: The maximum number of objects per backfill scan.p
+ with_legacy: true
+- name: osd_extblkdev_plugins
+ type: str
+ level: advanced
+ desc: extended block device plugins to load, provide compression feedback at runtime
+ default: vdo
+ flags:
+ - startup
+# minimum number of peers
+- name: osd_heartbeat_min_peers
+ type: int
+ level: advanced
+ default: 10
+ with_legacy: true
+- name: osd_delete_sleep
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction. This setting
+ overrides _ssd, _hdd, and _hybrid if non-zero.
+ fmt_desc: Time in seconds to sleep before the next removal transaction. This
+ throttles the PG deletion process.
+ default: 0
+ flags:
+ - runtime
+- name: osd_delete_sleep_hdd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction for HDDs
+ default: 5
+ flags:
+ - runtime
+- name: osd_delete_sleep_ssd
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction for SSDs
+ default: 1
+ flags:
+ - runtime
+- name: osd_delete_sleep_hybrid
+ type: float
+ level: advanced
+ desc: Time in seconds to sleep before next removal transaction when OSD data is on HDD
+ and OSD journal or WAL+DB is on SSD
+ default: 1
+ flags:
+ - runtime
+- name: osd_rocksdb_iterator_bounds_enabled
+ desc: Whether omap iterator bounds are applied to rocksdb iterator ReadOptions
+ type: bool
+ level: dev
+ default: true
+ with_legacy: true