# -*- mode: YAML -*- --- headers: | #include #include // rbd feature and io operation validation #include "include/stringify.h" #include "common/strtol.h" #include "librbd/Features.h" #include "librbd/io/IoOperations.h" options: - name: rbd_default_pool type: str level: advanced desc: default pool for storing new images default: rbd services: - rbd validator: | [](std::string *value, std::string *error_message) { std::regex pattern("^[^@/]+$"); if (!std::regex_match (*value, pattern)) { *value = "rbd"; *error_message = "invalid RBD default pool, resetting to 'rbd'"; } return 0; } - name: rbd_default_data_pool type: str level: advanced desc: default pool for storing data blocks for new images services: - rbd validator: | [](std::string *value, std::string *error_message) { std::regex pattern("^[^@/]*$"); if (!std::regex_match (*value, pattern)) { *value = ""; *error_message = "ignoring invalid RBD data pool"; } return 0; } - name: rbd_default_features type: str level: advanced desc: default v2 image features for new images long_desc: 'RBD features are only applicable for v2 images. This setting accepts either an integer bitmask value or comma-delimited string of RBD feature names. This setting is always internally stored as an integer bitmask value. The mapping between feature bitmask value and feature name is as follows: +1 -> layering, +2 -> striping, +4 -> exclusive-lock, +8 -> object-map, +16 -> fast-diff, +32 -> deep-flatten, +64 -> journaling, +128 -> data-pool' default: layering,exclusive-lock,object-map,fast-diff,deep-flatten services: - rbd flags: - runtime validator: | [](std::string *value, std::string *error_message) { std::stringstream ss; uint64_t features = librbd::rbd_features_from_string(*value, &ss); // Leave this in integer form to avoid breaking Cinder. Someday // we would like to present this in string form instead... *value = stringify(features); if (ss.str().size()) { return -EINVAL; } return 0; } - name: rbd_op_threads type: uint level: advanced desc: number of threads to utilize for internal processing default: 1 services: - rbd - name: rbd_op_thread_timeout type: uint level: advanced desc: time in seconds for detecting a hung thread default: 60 services: - rbd - name: rbd_disable_zero_copy_writes type: bool level: advanced desc: Disable the use of zero-copy writes to ensure unstable writes from clients cannot cause a CRC mismatch default: true services: - rbd - name: rbd_non_blocking_aio type: bool level: advanced desc: process AIO ops from a dispatch thread to prevent blocking default: true services: - rbd - name: rbd_cache type: bool level: advanced desc: whether to enable caching (writeback unless rbd_cache_max_dirty is 0) fmt_desc: Enable caching for RADOS Block Device (RBD). default: true services: - rbd - name: rbd_cache_policy type: str level: advanced desc: cache policy for handling writes. fmt_desc: Select the caching policy for librbd. default: writearound services: - rbd enum_values: - writethrough - writeback - writearound - name: rbd_cache_writethrough_until_flush type: bool level: advanced desc: whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushes so that writeback is safe fmt_desc: Start out in ``writethrough`` mode, and switch to ``writeback`` after the first flush request is received. Enabling is a conservative but safe strategy in case VMs running on RBD volumes are too old to send flushes, like the ``virtio`` driver in Linux kernels older than 2.6.32. default: true services: - rbd - name: rbd_cache_size type: size level: advanced desc: cache size in bytes fmt_desc: The per-volume RBD client cache size in bytes. default: 32_M policies: write-back and write-through services: - rbd - name: rbd_cache_max_dirty type: size level: advanced desc: dirty limit in bytes - set to 0 for write-through caching fmt_desc: The ``dirty`` limit in bytes at which the cache triggers write-back. If ``0``, uses write-through caching. default: 24_M constraint: Must be less than ``rbd_cache_size``. policies: write-around and write-back services: - rbd - name: rbd_cache_target_dirty type: size level: advanced desc: target dirty limit in bytes fmt_desc: The ``dirty target`` before the cache begins writing data to the data storage. Does not block writes to the cache. default: 16_M constraint: Must be less than ``rbd_cache_max_dirty``. policies: write-back services: - rbd - name: rbd_cache_max_dirty_age type: float level: advanced desc: seconds in cache before writeback starts fmt_desc: The number of seconds dirty data is in the cache before writeback starts. default: 1 policies: write-back services: - rbd - name: rbd_cache_max_dirty_object type: uint level: advanced desc: dirty limit for objects - set to 0 for auto calculate from rbd_cache_size default: 0 services: - rbd - name: rbd_cache_block_writes_upfront type: bool level: advanced desc: whether to block writes to the cache before the aio_write call completes default: false services: - rbd - name: rbd_parent_cache_enabled type: bool level: advanced desc: whether to enable rbd shared ro cache default: false services: - rbd - name: rbd_concurrent_management_ops type: uint level: advanced desc: how many operations can be in flight for a management operation like deleting or resizing an image default: 10 services: - rbd min: 1 - name: rbd_balance_snap_reads type: bool level: advanced desc: distribute snap read requests to random OSD default: false services: - rbd see_also: - rbd_read_from_replica_policy - name: rbd_localize_snap_reads type: bool level: advanced desc: localize snap read requests to closest OSD default: false services: - rbd see_also: - rbd_read_from_replica_policy - name: rbd_balance_parent_reads type: bool level: advanced desc: distribute parent read requests to random OSD default: false services: - rbd see_also: - rbd_read_from_replica_policy - name: rbd_localize_parent_reads type: bool level: advanced desc: localize parent requests to closest OSD default: false services: - rbd see_also: - rbd_read_from_replica_policy - name: rbd_sparse_read_threshold_bytes type: size level: advanced desc: threshold for issuing a sparse-read long_desc: minimum number of sequential bytes to read against an object before issuing a sparse-read request to the cluster. 0 implies it must be a full object read to issue a sparse-read, 1 implies always use sparse-read, and any value larger than the maximum object size will disable sparse-read for all requests default: 64_K services: - rbd - name: rbd_readahead_trigger_requests type: uint level: advanced desc: number of sequential requests necessary to trigger readahead default: 10 services: - rbd - name: rbd_readahead_max_bytes type: size level: advanced desc: set to 0 to disable readahead fmt_desc: Maximum size of a read-ahead request. If zero, read-ahead is disabled. default: 512_K services: - rbd - name: rbd_readahead_disable_after_bytes type: size level: advanced desc: how many bytes are read in total before readahead is disabled fmt_desc: After this many bytes have been read from an RBD image, read-ahead is disabled for that image until it is closed. This allows the guest OS to take over read-ahead once it is booted. If zero, read-ahead stays enabled. default: 50_M services: - rbd - name: rbd_clone_copy_on_read type: bool level: advanced desc: copy-up parent image blocks to clone upon read request default: false services: - rbd - name: rbd_blocklist_on_break_lock type: bool level: advanced desc: whether to blocklist clients whose lock was broken default: true services: - rbd - name: rbd_blocklist_expire_seconds type: uint level: advanced desc: number of seconds to blocklist - set to 0 for OSD default default: 0 services: - rbd - name: rbd_request_timed_out_seconds type: uint level: advanced desc: number of seconds before maintenance request times out default: 30 services: - rbd - name: rbd_skip_partial_discard type: bool level: advanced desc: skip discard (zero) of unaligned extents within an object default: true services: - rbd - name: rbd_discard_granularity_bytes type: uint level: advanced desc: minimum aligned size of discard operations default: 64_K services: - rbd min: 4_K max: 32_M validator: | [](std::string *value, std::string *error_message) { uint64_t f = strict_si_cast(*value, error_message); if (!error_message->empty()) { return -EINVAL; } else if (!std::has_single_bit(f)) { *error_message = "value must be a power of two"; return -EINVAL; } return 0; } - name: rbd_enable_alloc_hint type: bool level: advanced desc: when writing a object, it will issue a hint to osd backend to indicate the expected size object need default: true services: - rbd - name: rbd_compression_hint type: str level: basic desc: Compression hint to send to the OSDs during writes fmt_desc: Hint to send to the OSDs on write operations. If set to ``compressible`` and the OSD ``bluestore_compression_mode`` setting is ``passive``, the OSD will attempt to compress data. If set to ``incompressible`` and the OSD compression setting is ``aggressive``, the OSD will not attempt to compress data. default: none services: - rbd enum_values: - none - compressible - incompressible flags: - runtime - name: rbd_read_from_replica_policy type: str level: basic desc: Read replica policy send to the OSDS during reads fmt_desc: | Policy for determining which OSD will receive read operations. If set to ``default``, each PG's primary OSD will always be used for read operations. If set to ``balance``, read operations will be sent to a randomly selected OSD within the replica set. If set to ``localize``, read operations will be sent to the closest OSD as determined by the CRUSH map. Unlike ``rbd_balance_snap_reads`` and ``rbd_localize_snap_reads`` or ``rbd_balance_parent_reads`` and ``rbd_localize_parent_reads``, it affects all read operations, not just snap or parent. Note: this feature requires the cluster to be configured with a minimum compatible OSD release of Octopus. default: default services: - rbd enum_values: - default - balance - localize flags: - runtime - name: rbd_tracing type: bool level: advanced desc: true if LTTng-UST tracepoints should be enabled default: false services: - rbd - name: rbd_blkin_trace_all type: bool level: advanced desc: create a blkin trace for all RBD requests default: false services: - rbd - name: rbd_validate_pool type: bool level: advanced desc: validate empty pools for RBD compatibility default: true services: - rbd - name: rbd_validate_names type: bool level: advanced desc: validate new image names for RBD compatibility default: true services: - rbd - name: rbd_invalidate_object_map_on_timeout type: bool level: dev desc: true if object map should be invalidated when load or update timeout default: true services: - rbd - name: rbd_auto_exclusive_lock_until_manual_request type: bool level: advanced desc: automatically acquire/release exclusive lock until it is explicitly requested default: true services: - rbd - name: rbd_move_to_trash_on_remove type: bool level: basic desc: automatically move images to the trash when deleted default: false services: - rbd - name: rbd_move_to_trash_on_remove_expire_seconds type: uint level: basic desc: default number of seconds to protect deleted images in the trash default: 0 services: - rbd - name: rbd_move_parent_to_trash_on_remove type: bool level: basic desc: move parent with clone format v2 children to the trash when deleted default: false services: - rbd - name: rbd_mirroring_resync_after_disconnect type: bool level: advanced desc: automatically start image resync after mirroring is disconnected due to being laggy default: false services: - rbd - name: rbd_mirroring_delete_delay type: uint level: advanced desc: time-delay in seconds for rbd-mirror delete propagation default: 0 services: - rbd - name: rbd_mirroring_replay_delay type: uint level: advanced desc: time-delay in seconds for rbd-mirror asynchronous replication default: 0 services: - rbd - name: rbd_mirroring_max_mirroring_snapshots type: uint level: advanced desc: mirroring snapshots limit default: 5 services: - rbd min: 3 - name: rbd_default_format type: uint level: advanced desc: default image format for new images default: 2 services: - rbd - name: rbd_default_order type: uint level: advanced desc: default order (data block object size) for new images long_desc: This configures the default object size for new images. The value is used as a power of two, meaning ``default_object_size = 2 ^ rbd_default_order``. Configure a value between 12 and 25 (inclusive), translating to 4KiB lower and 32MiB upper limit. default: 22 services: - rbd - name: rbd_default_stripe_count type: uint level: advanced desc: default stripe count for new images default: 0 services: - rbd - name: rbd_default_stripe_unit type: size level: advanced desc: default stripe width for new images default: 0 services: - rbd - name: rbd_default_map_options type: str level: advanced desc: default krbd map options services: - rbd - name: rbd_default_clone_format type: str level: advanced desc: default internal format for handling clones long_desc: This sets the internal format for tracking cloned images. The setting of '1' requires attaching to protected snapshots that cannot be removed until the clone is removed/flattened. The setting of '2' will allow clones to be attached to any snapshot and permits removing in-use parent snapshots but requires Mimic or later clients. The default setting of 'auto' will use the v2 format if the cluster is configured to require mimic or later clients. default: auto services: - rbd enum_values: - '1' - '2' - auto flags: - runtime - name: rbd_journal_order type: uint level: advanced desc: default order (object size) for journal data objects default: 24 services: - rbd min: 12 max: 26 - name: rbd_journal_splay_width type: uint level: advanced desc: number of active journal objects default: 4 services: - rbd - name: rbd_journal_commit_age type: float level: advanced desc: commit time interval, seconds default: 5 services: - rbd - name: rbd_journal_object_writethrough_until_flush type: bool level: advanced desc: when enabled, the rbd_journal_object_flush* configuration options are ignored until the first flush so that batched journal IO is known to be safe for consistency default: true services: - rbd - name: rbd_journal_object_flush_interval type: uint level: advanced desc: maximum number of pending commits per journal object default: 0 services: - rbd - name: rbd_journal_object_flush_bytes type: size level: advanced desc: maximum number of pending bytes per journal object default: 1_M services: - rbd - name: rbd_journal_object_flush_age type: float level: advanced desc: maximum age (in seconds) for pending commits default: 0 services: - rbd - name: rbd_journal_object_max_in_flight_appends type: uint level: advanced desc: maximum number of in-flight appends per journal object default: 0 services: - rbd - name: rbd_journal_pool type: str level: advanced desc: pool for journal objects services: - rbd - name: rbd_journal_max_payload_bytes type: size level: advanced desc: maximum journal payload size before splitting default: 16_K services: - rbd - name: rbd_journal_max_concurrent_object_sets type: uint level: advanced desc: maximum number of object sets a journal client can be behind before it is automatically unregistered default: 0 services: - rbd - name: rbd_qos_iops_limit type: uint level: advanced desc: the desired limit of IO operations per second default: 0 services: - rbd - name: rbd_qos_bps_limit type: uint level: advanced desc: the desired limit of IO bytes per second default: 0 services: - rbd - name: rbd_qos_read_iops_limit type: uint level: advanced desc: the desired limit of read operations per second default: 0 services: - rbd - name: rbd_qos_write_iops_limit type: uint level: advanced desc: the desired limit of write operations per second default: 0 services: - rbd - name: rbd_qos_read_bps_limit type: uint level: advanced desc: the desired limit of read bytes per second default: 0 services: - rbd - name: rbd_qos_write_bps_limit type: uint level: advanced desc: the desired limit of write bytes per second default: 0 services: - rbd - name: rbd_qos_iops_burst type: uint level: advanced desc: the desired burst limit of IO operations default: 0 services: - rbd - name: rbd_qos_bps_burst type: uint level: advanced desc: the desired burst limit of IO bytes default: 0 services: - rbd - name: rbd_qos_read_iops_burst type: uint level: advanced desc: the desired burst limit of read operations default: 0 services: - rbd - name: rbd_qos_write_iops_burst type: uint level: advanced desc: the desired burst limit of write operations default: 0 services: - rbd - name: rbd_qos_read_bps_burst type: uint level: advanced desc: the desired burst limit of read bytes default: 0 services: - rbd - name: rbd_qos_write_bps_burst type: uint level: advanced desc: the desired burst limit of write bytes default: 0 services: - rbd - name: rbd_qos_iops_burst_seconds type: uint level: advanced desc: the desired burst duration in seconds of IO operations default: 1 services: - rbd min: 1 - name: rbd_qos_bps_burst_seconds type: uint level: advanced desc: the desired burst duration in seconds of IO bytes default: 1 services: - rbd min: 1 - name: rbd_qos_read_iops_burst_seconds type: uint level: advanced desc: the desired burst duration in seconds of read operations default: 1 services: - rbd min: 1 - name: rbd_qos_write_iops_burst_seconds type: uint level: advanced desc: the desired burst duration in seconds of write operations default: 1 services: - rbd min: 1 - name: rbd_qos_read_bps_burst_seconds type: uint level: advanced desc: the desired burst duration in seconds of read bytes default: 1 services: - rbd min: 1 - name: rbd_qos_write_bps_burst_seconds type: uint level: advanced desc: the desired burst duration in seconds of write bytes default: 1 services: - rbd min: 1 - name: rbd_qos_schedule_tick_min type: uint level: advanced desc: minimum schedule tick (in milliseconds) for QoS long_desc: This determines the minimum time (in milliseconds) at which I/Os can become unblocked if the limit of a throttle is hit. In terms of the token bucket algorithm, this is the minimum interval at which tokens are added to the bucket. default: 50 services: - rbd min: 1 - name: rbd_qos_exclude_ops type: str level: advanced desc: optionally exclude ops from QoS long_desc: 'Optionally exclude ops from QoS. This setting accepts either an integer bitmask value or comma-delimited string of op names. This setting is always internally stored as an integer bitmask value. The mapping between op bitmask value and op name is as follows: +1 -> read, +2 -> write, +4 -> discard, +8 -> write_same, +16 -> compare_and_write' services: - rbd flags: - runtime validator: | [](std::string *value, std::string *error_message) { std::ostringstream ss; uint64_t exclude_ops = librbd::io::rbd_io_operations_from_string(*value, &ss); // Leave this in integer form to avoid breaking Cinder. Someday // we would like to present this in string form instead... *value = stringify(exclude_ops); if (ss.str().size()) { return -EINVAL; } return 0; } - name: rbd_discard_on_zeroed_write_same type: bool level: advanced desc: discard data on zeroed write same instead of writing zero default: true services: - rbd - name: rbd_mtime_update_interval type: uint level: advanced desc: RBD Image modify timestamp refresh interval. Set to 0 to disable modify timestamp update. default: 60 services: - rbd min: 0 - name: rbd_atime_update_interval type: uint level: advanced desc: RBD Image access timestamp refresh interval. Set to 0 to disable access timestamp update. default: 60 services: - rbd min: 0 - name: rbd_io_scheduler type: str level: advanced desc: RBD IO scheduler default: simple services: - rbd enum_values: - none - simple - name: rbd_io_scheduler_simple_max_delay type: uint level: advanced desc: maximum io delay (in milliseconds) for simple io scheduler (if set to 0 dalay is calculated based on latency stats) default: 0 services: - rbd min: 0 - name: rbd_persistent_cache_mode type: str level: advanced desc: enable persistent write back cache for this volume default: disabled services: - rbd enum_values: - disabled - rwl - ssd - name: rbd_persistent_cache_size type: uint level: advanced desc: size of the persistent write back cache for this volume default: 1_G services: - rbd min: 1_G - name: rbd_persistent_cache_path type: str level: advanced desc: location of the persistent write back cache in a DAX-enabled filesystem on persistent memory default: /tmp services: - rbd - name: rbd_quiesce_notification_attempts type: uint level: dev desc: the number of quiesce notification attempts default: 10 services: - rbd min: 1 - name: rbd_default_snapshot_quiesce_mode type: str level: advanced desc: default snapshot quiesce mode default: required services: - rbd enum_values: - required - ignore-error - skip - name: rbd_plugins type: str level: advanced desc: comma-delimited list of librbd plugins to enable services: - rbd - name: rbd_config_pool_override_update_timestamp type: uint level: dev desc: timestamp of last update to pool-level config overrides default: 0 services: - rbd