diff options
Diffstat (limited to '')
-rw-r--r-- | src/common/options/rbd.yaml.in | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/src/common/options/rbd.yaml.in b/src/common/options/rbd.yaml.in new file mode 100644 index 000000000..c2da27aaa --- /dev/null +++ b/src/common/options/rbd.yaml.in @@ -0,0 +1,881 @@ +# -*- mode: YAML -*- +--- + +headers: | + #include <bit> + #include <regex> + // rbd feature and io operation validation + #include "include/stringify.h" + #include "common/strtol.h" + #include "librbd/Features.h" + #include "librbd/io/IoOperations.h" +options: +- name: rbd_default_pool + type: str + level: advanced + desc: default pool for storing new images + default: rbd + services: + - rbd + validator: | + [](std::string *value, std::string *error_message) { + std::regex pattern("^[^@/]+$"); + if (!std::regex_match (*value, pattern)) { + *value = "rbd"; + *error_message = "invalid RBD default pool, resetting to 'rbd'"; + } + return 0; + } +- name: rbd_default_data_pool + type: str + level: advanced + desc: default pool for storing data blocks for new images + services: + - rbd + validator: | + [](std::string *value, std::string *error_message) { + std::regex pattern("^[^@/]*$"); + if (!std::regex_match (*value, pattern)) { + *value = ""; + *error_message = "ignoring invalid RBD data pool"; + } + return 0; + } +- name: rbd_default_features + type: str + level: advanced + desc: default v2 image features for new images + long_desc: 'RBD features are only applicable for v2 images. This setting accepts + either an integer bitmask value or comma-delimited string of RBD feature names. + This setting is always internally stored as an integer bitmask value. The mapping + between feature bitmask value and feature name is as follows: +1 -> layering, + +2 -> striping, +4 -> exclusive-lock, +8 -> object-map, +16 -> fast-diff, +32 + -> deep-flatten, +64 -> journaling, +128 -> data-pool' + default: layering,exclusive-lock,object-map,fast-diff,deep-flatten + services: + - rbd + flags: + - runtime + validator: | + [](std::string *value, std::string *error_message) { + std::stringstream ss; + uint64_t features = librbd::rbd_features_from_string(*value, &ss); + // Leave this in integer form to avoid breaking Cinder. Someday + // we would like to present this in string form instead... + *value = stringify(features); + if (ss.str().size()) { + return -EINVAL; + } + return 0; + } +- name: rbd_op_threads + type: uint + level: advanced + desc: number of threads to utilize for internal processing + default: 1 + services: + - rbd +- name: rbd_op_thread_timeout + type: uint + level: advanced + desc: time in seconds for detecting a hung thread + default: 60 + services: + - rbd +- name: rbd_disable_zero_copy_writes + type: bool + level: advanced + desc: Disable the use of zero-copy writes to ensure unstable writes from clients + cannot cause a CRC mismatch + default: true + services: + - rbd +- name: rbd_non_blocking_aio + type: bool + level: advanced + desc: process AIO ops from a dispatch thread to prevent blocking + default: true + services: + - rbd +- name: rbd_cache + type: bool + level: advanced + desc: whether to enable caching (writeback unless rbd_cache_max_dirty is 0) + fmt_desc: Enable caching for RADOS Block Device (RBD). + default: true + services: + - rbd +- name: rbd_cache_policy + type: str + level: advanced + desc: cache policy for handling writes. + fmt_desc: Select the caching policy for librbd. + default: writearound + services: + - rbd + enum_values: + - writethrough + - writeback + - writearound +- name: rbd_cache_writethrough_until_flush + type: bool + level: advanced + desc: whether to make writeback caching writethrough until flush is called, to be + sure the user of librbd will send flushes so that writeback is safe + fmt_desc: Start out in ``writethrough`` mode, and switch to ``writeback`` + after the first flush request is received. Enabling is a + conservative but safe strategy in case VMs running on RBD volumes + are too old to send flushes, like the ``virtio`` driver in Linux + kernels older than 2.6.32. + default: true + services: + - rbd +- name: rbd_cache_size + type: size + level: advanced + desc: cache size in bytes + fmt_desc: The per-volume RBD client cache size in bytes. + default: 32_M + policies: write-back and write-through + services: + - rbd +- name: rbd_cache_max_dirty + type: size + level: advanced + desc: dirty limit in bytes - set to 0 for write-through caching + fmt_desc: The ``dirty`` limit in bytes at which the cache triggers write-back. + If ``0``, uses write-through caching. + default: 24_M + constraint: Must be less than ``rbd_cache_size``. + policies: write-around and write-back + services: + - rbd +- name: rbd_cache_target_dirty + type: size + level: advanced + desc: target dirty limit in bytes + fmt_desc: The ``dirty target`` before the cache begins writing data to the data + storage. Does not block writes to the cache. + default: 16_M + constraint: Must be less than ``rbd_cache_max_dirty``. + policies: write-back + services: + - rbd +- name: rbd_cache_max_dirty_age + type: float + level: advanced + desc: seconds in cache before writeback starts + fmt_desc: The number of seconds dirty data is in the cache before writeback starts. + default: 1 + policies: write-back + services: + - rbd +- name: rbd_cache_max_dirty_object + type: uint + level: advanced + desc: dirty limit for objects - set to 0 for auto calculate from rbd_cache_size + default: 0 + services: + - rbd +- name: rbd_cache_block_writes_upfront + type: bool + level: advanced + desc: whether to block writes to the cache before the aio_write call completes + default: false + services: + - rbd +- name: rbd_parent_cache_enabled + type: bool + level: advanced + desc: whether to enable rbd shared ro cache + default: false + services: + - rbd +- name: rbd_concurrent_management_ops + type: uint + level: advanced + desc: how many operations can be in flight for a management operation like deleting + or resizing an image + default: 10 + services: + - rbd + min: 1 +- name: rbd_balance_snap_reads + type: bool + level: advanced + desc: distribute snap read requests to random OSD + default: false + services: + - rbd + see_also: + - rbd_read_from_replica_policy +- name: rbd_localize_snap_reads + type: bool + level: advanced + desc: localize snap read requests to closest OSD + default: false + services: + - rbd + see_also: + - rbd_read_from_replica_policy +- name: rbd_balance_parent_reads + type: bool + level: advanced + desc: distribute parent read requests to random OSD + default: false + services: + - rbd + see_also: + - rbd_read_from_replica_policy +- name: rbd_localize_parent_reads + type: bool + level: advanced + desc: localize parent requests to closest OSD + default: false + services: + - rbd + see_also: + - rbd_read_from_replica_policy +- name: rbd_sparse_read_threshold_bytes + type: size + level: advanced + desc: threshold for issuing a sparse-read + long_desc: minimum number of sequential bytes to read against an object before issuing + a sparse-read request to the cluster. 0 implies it must be a full object read + to issue a sparse-read, 1 implies always use sparse-read, and any value larger + than the maximum object size will disable sparse-read for all requests + default: 64_K + services: + - rbd +- name: rbd_readahead_trigger_requests + type: uint + level: advanced + desc: number of sequential requests necessary to trigger readahead + default: 10 + services: + - rbd +- name: rbd_readahead_max_bytes + type: size + level: advanced + desc: set to 0 to disable readahead + fmt_desc: Maximum size of a read-ahead request. If zero, read-ahead is disabled. + default: 512_K + services: + - rbd +- name: rbd_readahead_disable_after_bytes + type: size + level: advanced + desc: how many bytes are read in total before readahead is disabled + fmt_desc: After this many bytes have been read from an RBD image, read-ahead + is disabled for that image until it is closed. This allows the + guest OS to take over read-ahead once it is booted. If zero, + read-ahead stays enabled. + default: 50_M + services: + - rbd +- name: rbd_clone_copy_on_read + type: bool + level: advanced + desc: copy-up parent image blocks to clone upon read request + default: false + services: + - rbd +- name: rbd_blocklist_on_break_lock + type: bool + level: advanced + desc: whether to blocklist clients whose lock was broken + default: true + services: + - rbd +- name: rbd_blocklist_expire_seconds + type: uint + level: advanced + desc: number of seconds to blocklist - set to 0 for OSD default + default: 0 + services: + - rbd +- name: rbd_request_timed_out_seconds + type: uint + level: advanced + desc: number of seconds before maintenance request times out + default: 30 + services: + - rbd +- name: rbd_skip_partial_discard + type: bool + level: advanced + desc: skip discard (zero) of unaligned extents within an object + default: true + services: + - rbd +- name: rbd_discard_granularity_bytes + type: uint + level: advanced + desc: minimum aligned size of discard operations + default: 64_K + services: + - rbd + min: 4_K + max: 32_M + validator: | + [](std::string *value, std::string *error_message) { + uint64_t f = strict_si_cast<uint64_t>(*value, error_message); + if (!error_message->empty()) { + return -EINVAL; + } else if (!std::has_single_bit(f)) { + *error_message = "value must be a power of two"; + return -EINVAL; + } + return 0; + } +- name: rbd_enable_alloc_hint + type: bool + level: advanced + desc: when writing a object, it will issue a hint to osd backend to indicate the + expected size object need + default: true + services: + - rbd +- name: rbd_compression_hint + type: str + level: basic + desc: Compression hint to send to the OSDs during writes + fmt_desc: Hint to send to the OSDs on write operations. If set to + ``compressible`` and the OSD ``bluestore_compression_mode`` + setting is ``passive``, the OSD will attempt to compress data. + If set to ``incompressible`` and the OSD compression setting + is ``aggressive``, the OSD will not attempt to compress data. + default: none + services: + - rbd + enum_values: + - none + - compressible + - incompressible + flags: + - runtime +- name: rbd_read_from_replica_policy + type: str + level: basic + desc: Read replica policy send to the OSDS during reads + fmt_desc: | + Policy for determining which OSD will receive read operations. + If set to ``default``, each PG's primary OSD will always be used + for read operations. If set to ``balance``, read operations will + be sent to a randomly selected OSD within the replica set. If set + to ``localize``, read operations will be sent to the closest OSD + as determined by the CRUSH map. Unlike ``rbd_balance_snap_reads`` + and ``rbd_localize_snap_reads`` or ``rbd_balance_parent_reads`` and + ``rbd_localize_parent_reads``, it affects all read operations, not + just snap or parent. Note: this feature requires the cluster to + be configured with a minimum compatible OSD release of Octopus. + default: default + services: + - rbd + enum_values: + - default + - balance + - localize + flags: + - runtime +- name: rbd_tracing + type: bool + level: advanced + desc: true if LTTng-UST tracepoints should be enabled + default: false + services: + - rbd +- name: rbd_blkin_trace_all + type: bool + level: advanced + desc: create a blkin trace for all RBD requests + default: false + services: + - rbd +- name: rbd_validate_pool + type: bool + level: advanced + desc: validate empty pools for RBD compatibility + default: true + services: + - rbd +- name: rbd_validate_names + type: bool + level: advanced + desc: validate new image names for RBD compatibility + default: true + services: + - rbd +- name: rbd_invalidate_object_map_on_timeout + type: bool + level: dev + desc: true if object map should be invalidated when load or update timeout + default: true + services: + - rbd +- name: rbd_auto_exclusive_lock_until_manual_request + type: bool + level: advanced + desc: automatically acquire/release exclusive lock until it is explicitly requested + default: true + services: + - rbd +- name: rbd_move_to_trash_on_remove + type: bool + level: basic + desc: automatically move images to the trash when deleted + default: false + services: + - rbd +- name: rbd_move_to_trash_on_remove_expire_seconds + type: uint + level: basic + desc: default number of seconds to protect deleted images in the trash + default: 0 + services: + - rbd +- name: rbd_move_parent_to_trash_on_remove + type: bool + level: basic + desc: move parent with clone format v2 children to the trash when deleted + default: false + services: + - rbd +- name: rbd_mirroring_resync_after_disconnect + type: bool + level: advanced + desc: automatically start image resync after mirroring is disconnected due to being + laggy + default: false + services: + - rbd +- name: rbd_mirroring_delete_delay + type: uint + level: advanced + desc: time-delay in seconds for rbd-mirror delete propagation + default: 0 + services: + - rbd +- name: rbd_mirroring_replay_delay + type: uint + level: advanced + desc: time-delay in seconds for rbd-mirror asynchronous replication + default: 0 + services: + - rbd +- name: rbd_mirroring_max_mirroring_snapshots + type: uint + level: advanced + desc: mirroring snapshots limit + default: 5 + services: + - rbd + min: 3 +- name: rbd_default_format + type: uint + level: advanced + desc: default image format for new images + default: 2 + services: + - rbd +- name: rbd_default_order + type: uint + level: advanced + desc: default order (data block object size) for new images + long_desc: This configures the default object size for new images. The value is used as a + power of two, meaning ``default_object_size = 2 ^ rbd_default_order``. Configure a value + between 12 and 25 (inclusive), translating to 4KiB lower and 32MiB upper limit. + default: 22 + services: + - rbd +- name: rbd_default_stripe_count + type: uint + level: advanced + desc: default stripe count for new images + default: 0 + services: + - rbd +- name: rbd_default_stripe_unit + type: size + level: advanced + desc: default stripe width for new images + default: 0 + services: + - rbd +- name: rbd_default_map_options + type: str + level: advanced + desc: default krbd map options + services: + - rbd +- name: rbd_default_clone_format + type: str + level: advanced + desc: default internal format for handling clones + long_desc: This sets the internal format for tracking cloned images. The setting + of '1' requires attaching to protected snapshots that cannot be removed until + the clone is removed/flattened. The setting of '2' will allow clones to be attached + to any snapshot and permits removing in-use parent snapshots but requires Mimic + or later clients. The default setting of 'auto' will use the v2 format if the + cluster is configured to require mimic or later clients. + default: auto + services: + - rbd + enum_values: + - '1' + - '2' + - auto + flags: + - runtime +- name: rbd_journal_order + type: uint + level: advanced + desc: default order (object size) for journal data objects + default: 24 + services: + - rbd + min: 12 + max: 26 +- name: rbd_journal_splay_width + type: uint + level: advanced + desc: number of active journal objects + default: 4 + services: + - rbd +- name: rbd_journal_commit_age + type: float + level: advanced + desc: commit time interval, seconds + default: 5 + services: + - rbd +- name: rbd_journal_object_writethrough_until_flush + type: bool + level: advanced + desc: when enabled, the rbd_journal_object_flush* configuration options are ignored + until the first flush so that batched journal IO is known to be safe for consistency + default: true + services: + - rbd +- name: rbd_journal_object_flush_interval + type: uint + level: advanced + desc: maximum number of pending commits per journal object + default: 0 + services: + - rbd +- name: rbd_journal_object_flush_bytes + type: size + level: advanced + desc: maximum number of pending bytes per journal object + default: 1_M + services: + - rbd +- name: rbd_journal_object_flush_age + type: float + level: advanced + desc: maximum age (in seconds) for pending commits + default: 0 + services: + - rbd +- name: rbd_journal_object_max_in_flight_appends + type: uint + level: advanced + desc: maximum number of in-flight appends per journal object + default: 0 + services: + - rbd +- name: rbd_journal_pool + type: str + level: advanced + desc: pool for journal objects + services: + - rbd +- name: rbd_journal_max_payload_bytes + type: size + level: advanced + desc: maximum journal payload size before splitting + default: 16_K + services: + - rbd +- name: rbd_journal_max_concurrent_object_sets + type: uint + level: advanced + desc: maximum number of object sets a journal client can be behind before it is + automatically unregistered + default: 0 + services: + - rbd +- name: rbd_qos_iops_limit + type: uint + level: advanced + desc: the desired limit of IO operations per second + default: 0 + services: + - rbd +- name: rbd_qos_bps_limit + type: uint + level: advanced + desc: the desired limit of IO bytes per second + default: 0 + services: + - rbd +- name: rbd_qos_read_iops_limit + type: uint + level: advanced + desc: the desired limit of read operations per second + default: 0 + services: + - rbd +- name: rbd_qos_write_iops_limit + type: uint + level: advanced + desc: the desired limit of write operations per second + default: 0 + services: + - rbd +- name: rbd_qos_read_bps_limit + type: uint + level: advanced + desc: the desired limit of read bytes per second + default: 0 + services: + - rbd +- name: rbd_qos_write_bps_limit + type: uint + level: advanced + desc: the desired limit of write bytes per second + default: 0 + services: + - rbd +- name: rbd_qos_iops_burst + type: uint + level: advanced + desc: the desired burst limit of IO operations + default: 0 + services: + - rbd +- name: rbd_qos_bps_burst + type: uint + level: advanced + desc: the desired burst limit of IO bytes + default: 0 + services: + - rbd +- name: rbd_qos_read_iops_burst + type: uint + level: advanced + desc: the desired burst limit of read operations + default: 0 + services: + - rbd +- name: rbd_qos_write_iops_burst + type: uint + level: advanced + desc: the desired burst limit of write operations + default: 0 + services: + - rbd +- name: rbd_qos_read_bps_burst + type: uint + level: advanced + desc: the desired burst limit of read bytes + default: 0 + services: + - rbd +- name: rbd_qos_write_bps_burst + type: uint + level: advanced + desc: the desired burst limit of write bytes + default: 0 + services: + - rbd +- name: rbd_qos_iops_burst_seconds + type: uint + level: advanced + desc: the desired burst duration in seconds of IO operations + default: 1 + services: + - rbd + min: 1 +- name: rbd_qos_bps_burst_seconds + type: uint + level: advanced + desc: the desired burst duration in seconds of IO bytes + default: 1 + services: + - rbd + min: 1 +- name: rbd_qos_read_iops_burst_seconds + type: uint + level: advanced + desc: the desired burst duration in seconds of read operations + default: 1 + services: + - rbd + min: 1 +- name: rbd_qos_write_iops_burst_seconds + type: uint + level: advanced + desc: the desired burst duration in seconds of write operations + default: 1 + services: + - rbd + min: 1 +- name: rbd_qos_read_bps_burst_seconds + type: uint + level: advanced + desc: the desired burst duration in seconds of read bytes + default: 1 + services: + - rbd + min: 1 +- name: rbd_qos_write_bps_burst_seconds + type: uint + level: advanced + desc: the desired burst duration in seconds of write bytes + default: 1 + services: + - rbd + min: 1 +- name: rbd_qos_schedule_tick_min + type: uint + level: advanced + desc: minimum schedule tick (in milliseconds) for QoS + long_desc: This determines the minimum time (in milliseconds) at which I/Os + can become unblocked if the limit of a throttle is hit. In terms of the + token bucket algorithm, this is the minimum interval at which tokens are + added to the bucket. + default: 50 + services: + - rbd + min: 1 +- name: rbd_qos_exclude_ops + type: str + level: advanced + desc: optionally exclude ops from QoS + long_desc: 'Optionally exclude ops from QoS. This setting accepts either an integer + bitmask value or comma-delimited string of op names. This setting is always internally + stored as an integer bitmask value. The mapping between op bitmask value and op + name is as follows: +1 -> read, +2 -> write, +4 -> discard, +8 -> write_same, + +16 -> compare_and_write' + services: + - rbd + flags: + - runtime + validator: | + [](std::string *value, std::string *error_message) { + std::ostringstream ss; + uint64_t exclude_ops = librbd::io::rbd_io_operations_from_string(*value, &ss); + // Leave this in integer form to avoid breaking Cinder. Someday + // we would like to present this in string form instead... + *value = stringify(exclude_ops); + if (ss.str().size()) { + return -EINVAL; + } + return 0; + } +- name: rbd_discard_on_zeroed_write_same + type: bool + level: advanced + desc: discard data on zeroed write same instead of writing zero + default: true + services: + - rbd +- name: rbd_mtime_update_interval + type: uint + level: advanced + desc: RBD Image modify timestamp refresh interval. Set to 0 to disable modify timestamp + update. + default: 60 + services: + - rbd + min: 0 +- name: rbd_atime_update_interval + type: uint + level: advanced + desc: RBD Image access timestamp refresh interval. Set to 0 to disable access timestamp + update. + default: 60 + services: + - rbd + min: 0 +- name: rbd_io_scheduler + type: str + level: advanced + desc: RBD IO scheduler + default: simple + services: + - rbd + enum_values: + - none + - simple +- name: rbd_io_scheduler_simple_max_delay + type: uint + level: advanced + desc: maximum io delay (in milliseconds) for simple io scheduler (if set to 0 dalay + is calculated based on latency stats) + default: 0 + services: + - rbd + min: 0 +- name: rbd_persistent_cache_mode + type: str + level: advanced + desc: enable persistent write back cache for this volume + default: disabled + services: + - rbd + enum_values: + - disabled + - rwl + - ssd +- name: rbd_persistent_cache_size + type: uint + level: advanced + desc: size of the persistent write back cache for this volume + default: 1_G + services: + - rbd + min: 1_G +- name: rbd_persistent_cache_path + type: str + level: advanced + desc: location of the persistent write back cache in a DAX-enabled filesystem on + persistent memory + default: /tmp + services: + - rbd +- name: rbd_quiesce_notification_attempts + type: uint + level: dev + desc: the number of quiesce notification attempts + default: 10 + services: + - rbd + min: 1 +- name: rbd_default_snapshot_quiesce_mode + type: str + level: advanced + desc: default snapshot quiesce mode + default: required + services: + - rbd + enum_values: + - required + - ignore-error + - skip +- name: rbd_plugins + type: str + level: advanced + desc: comma-delimited list of librbd plugins to enable + services: + - rbd +- name: rbd_config_pool_override_update_timestamp + type: uint + level: dev + desc: timestamp of last update to pool-level config overrides + default: 0 + services: + - rbd |