summaryrefslogtreecommitdiffstats
path: root/doc/rbd
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--doc/rbd/api/index.rst8
-rw-r--r--doc/rbd/api/librbdpy.rst83
-rw-r--r--doc/rbd/disk.conf8
-rw-r--r--doc/rbd/index.rst72
-rw-r--r--doc/rbd/iscsi-initiator-esx.rst105
-rw-r--r--doc/rbd/iscsi-initiator-linux.rst91
-rw-r--r--doc/rbd/iscsi-initiator-win.rst102
-rw-r--r--doc/rbd/iscsi-initiators.rst23
-rw-r--r--doc/rbd/iscsi-monitoring.rst85
-rw-r--r--doc/rbd/iscsi-overview.rst57
-rw-r--r--doc/rbd/iscsi-requirements.rst51
-rw-r--r--doc/rbd/iscsi-target-ansible.rst234
-rw-r--r--doc/rbd/iscsi-target-cli-manual-install.rst190
-rw-r--r--doc/rbd/iscsi-target-cli.rst244
-rw-r--r--doc/rbd/iscsi-targets.rst27
-rw-r--r--doc/rbd/libvirt.rst323
-rw-r--r--doc/rbd/man/index.rst16
-rw-r--r--doc/rbd/qemu-rbd.rst220
-rw-r--r--doc/rbd/rados-rbd-cmds.rst326
-rw-r--r--doc/rbd/rbd-cloudstack.rst157
-rw-r--r--doc/rbd/rbd-config-ref.rst476
-rw-r--r--doc/rbd/rbd-encryption.rst148
-rw-r--r--doc/rbd/rbd-exclusive-locks.rst86
-rw-r--r--doc/rbd/rbd-integrations.rst15
-rw-r--r--doc/rbd/rbd-ko.rst59
-rw-r--r--doc/rbd/rbd-kubernetes.rst314
-rw-r--r--doc/rbd/rbd-live-migration.rst367
-rw-r--r--doc/rbd/rbd-mirroring.rst529
-rw-r--r--doc/rbd/rbd-openstack.rst395
-rw-r--r--doc/rbd/rbd-operations.rst16
-rw-r--r--doc/rbd/rbd-persistent-read-only-cache.rst201
-rw-r--r--doc/rbd/rbd-persistent-write-back-cache.rst133
-rw-r--r--doc/rbd/rbd-replay.rst42
-rw-r--r--doc/rbd/rbd-snapshot.rst367
-rw-r--r--doc/rbd/rbd-windows.rst158
35 files changed, 5728 insertions, 0 deletions
diff --git a/doc/rbd/api/index.rst b/doc/rbd/api/index.rst
new file mode 100644
index 000000000..27bb4485d
--- /dev/null
+++ b/doc/rbd/api/index.rst
@@ -0,0 +1,8 @@
+========================
+ Ceph Block Device APIs
+========================
+
+.. toctree::
+ :maxdepth: 2
+
+ librbd (Python) <librbdpy>
diff --git a/doc/rbd/api/librbdpy.rst b/doc/rbd/api/librbdpy.rst
new file mode 100644
index 000000000..981235f87
--- /dev/null
+++ b/doc/rbd/api/librbdpy.rst
@@ -0,0 +1,83 @@
+================
+ Librbd (Python)
+================
+
+.. highlight:: python
+
+The `rbd` python module provides file-like access to RBD images.
+
+
+Example: Creating and writing to an image
+=========================================
+
+To use `rbd`, you must first connect to RADOS and open an IO
+context::
+
+ cluster = rados.Rados(conffile='my_ceph.conf')
+ cluster.connect()
+ ioctx = cluster.open_ioctx('mypool')
+
+Then you instantiate an :class:rbd.RBD object, which you use to create the
+image::
+
+ rbd_inst = rbd.RBD()
+ size = 4 * 1024**3 # 4 GiB
+ rbd_inst.create(ioctx, 'myimage', size)
+
+To perform I/O on the image, you instantiate an :class:rbd.Image object::
+
+ image = rbd.Image(ioctx, 'myimage')
+ data = 'foo' * 200
+ image.write(data, 0)
+
+This writes 'foo' to the first 600 bytes of the image. Note that data
+cannot be :type:unicode - `Librbd` does not know how to deal with
+characters wider than a :c:type:char.
+
+In the end, you will want to close the image, the IO context and the connection to RADOS::
+
+ image.close()
+ ioctx.close()
+ cluster.shutdown()
+
+To be safe, each of these calls would need to be in a separate :finally
+block::
+
+ cluster = rados.Rados(conffile='my_ceph_conf')
+ try:
+ cluster.connect()
+ ioctx = cluster.open_ioctx('my_pool')
+ try:
+ rbd_inst = rbd.RBD()
+ size = 4 * 1024**3 # 4 GiB
+ rbd_inst.create(ioctx, 'myimage', size)
+ image = rbd.Image(ioctx, 'myimage')
+ try:
+ data = 'foo' * 200
+ image.write(data, 0)
+ finally:
+ image.close()
+ finally:
+ ioctx.close()
+ finally:
+ cluster.shutdown()
+
+This can be cumbersome, so the :class:`Rados`, :class:`Ioctx`, and
+:class:`Image` classes can be used as context managers that close/shutdown
+automatically (see :pep:`343`). Using them as context managers, the
+above example becomes::
+
+ with rados.Rados(conffile='my_ceph.conf') as cluster:
+ with cluster.open_ioctx('mypool') as ioctx:
+ rbd_inst = rbd.RBD()
+ size = 4 * 1024**3 # 4 GiB
+ rbd_inst.create(ioctx, 'myimage', size)
+ with rbd.Image(ioctx, 'myimage') as image:
+ data = 'foo' * 200
+ image.write(data, 0)
+
+API Reference
+=============
+
+.. automodule:: rbd
+ :members: RBD, Image, SnapIterator
diff --git a/doc/rbd/disk.conf b/doc/rbd/disk.conf
new file mode 100644
index 000000000..3db9b8a11
--- /dev/null
+++ b/doc/rbd/disk.conf
@@ -0,0 +1,8 @@
+<disk type='network' device='disk'>
+ <source protocol='rbd' name='poolname/imagename'>
+ <host name='{fqdn}' port='6789'/>
+ <host name='{fqdn}' port='6790'/>
+ <host name='{fqdn}' port='6791'/>
+ </source>
+ <target dev='vda' bus='virtio'/>
+</disk>
diff --git a/doc/rbd/index.rst b/doc/rbd/index.rst
new file mode 100644
index 000000000..4a8029bba
--- /dev/null
+++ b/doc/rbd/index.rst
@@ -0,0 +1,72 @@
+===================
+ Ceph Block Device
+===================
+
+.. index:: Ceph Block Device; introduction
+
+A block is a sequence of bytes (often 512).
+Block-based storage interfaces are a mature and common way to store data on
+media including HDDs, SSDs, CDs, floppy disks, and even tape.
+The ubiquity of block device interfaces is a perfect fit for interacting
+with mass data storage including Ceph.
+
+Ceph block devices are thin-provisioned, resizable, and store data striped over
+multiple OSDs. Ceph block devices leverage
+:abbr:`RADOS (Reliable Autonomic Distributed Object Store)` capabilities
+including snapshotting, replication and strong consistency. Ceph block
+storage clients communicate with Ceph clusters through kernel modules or
+the ``librbd`` library.
+
+.. ditaa::
+
+ +------------------------+ +------------------------+
+ | Kernel Module | | librbd |
+ +------------------------+-+------------------------+
+ | RADOS Protocol |
+ +------------------------+-+------------------------+
+ | OSDs | | Monitors |
+ +------------------------+ +------------------------+
+
+.. note:: Kernel modules can use Linux page caching. For ``librbd``-based
+ applications, Ceph supports `RBD Caching`_.
+
+Ceph's block devices deliver high performance with vast scalability to
+`kernel modules`_, or to :abbr:`KVMs (kernel virtual machines)` such as `QEMU`_, and
+cloud-based computing systems like `OpenStack`_ and `CloudStack`_ that rely on
+libvirt and QEMU to integrate with Ceph block devices. You can use the same cluster
+to operate the :ref:`Ceph RADOS Gateway <object-gateway>`, the
+:ref:`Ceph File System <ceph-file-system>`, and Ceph block devices simultaneously.
+
+.. important:: To use Ceph Block Devices, you must have access to a running
+ Ceph cluster.
+
+.. toctree::
+ :maxdepth: 1
+
+ Basic Commands <rados-rbd-cmds>
+
+.. toctree::
+ :maxdepth: 2
+
+ Operations <rbd-operations>
+
+.. toctree::
+ :maxdepth: 2
+
+ Integrations <rbd-integrations>
+
+.. toctree::
+ :maxdepth: 2
+
+ Manpages <man/index>
+
+.. toctree::
+ :maxdepth: 2
+
+ APIs <api/index>
+
+.. _RBD Caching: ./rbd-config-ref/
+.. _kernel modules: ./rbd-ko/
+.. _QEMU: ./qemu-rbd/
+.. _OpenStack: ./rbd-openstack
+.. _CloudStack: ./rbd-cloudstack
diff --git a/doc/rbd/iscsi-initiator-esx.rst b/doc/rbd/iscsi-initiator-esx.rst
new file mode 100644
index 000000000..8bed6f2a2
--- /dev/null
+++ b/doc/rbd/iscsi-initiator-esx.rst
@@ -0,0 +1,105 @@
+------------------------------
+iSCSI Initiator for VMware ESX
+------------------------------
+
+**Prerequisite:**
+
+- VMware ESX 6.5 or later using Virtual Machine compatibility 6.5 with VMFS 6.
+
+**iSCSI Discovery and Multipath Device Setup:**
+
+The following instructions will use the default vSphere web client and esxcli.
+
+#. Enable Software iSCSI
+
+ .. image:: ../images/esx_web_client_storage_main.png
+ :align: center
+
+ Click on "Storage" from "Navigator", and select the "Adapters" tab.
+ From there right click "Configure iSCSI".
+
+#. Set Initiator Name
+
+ .. image:: ../images/esx_config_iscsi_main.png
+ :align: center
+
+ If the initiator name in the "Name & alias" section is not the same name
+ used when creating the client during gwcli setup or the initiator name used
+ in the ansible client_connections client variable, then ssh to the ESX
+ host and run the following esxcli commands to change the name.
+
+ Get the adapter name for Software iSCSI:
+
+ ::
+
+ > esxcli iscsi adapter list
+ > Adapter Driver State UID Description
+ > ------- --------- ------ ------------- ----------------------
+ > vmhba64 iscsi_vmk online iscsi.vmhba64 iSCSI Software Adapter
+
+ In this example the software iSCSI adapter is vmhba64 and the initiator
+ name is iqn.1994-05.com.redhat:rh7-client:
+
+ ::
+
+ > esxcli iscsi adapter set -A vmhba64 -n iqn.1994-05.com.redhat:rh7-client
+
+#. Setup CHAP
+
+ .. image:: ../images/esx_chap.png
+ :align: center
+
+ Expand the CHAP authentication section, select "Do not use CHAP unless
+ required by target" and enter the CHAP credentials used in the gwcli
+ auth command or ansible client_connections credentials variable.
+
+ The Mutual CHAP authentication section should have "Do not use CHAP"
+ selected.
+
+ Warning: There is a bug in the web client where the requested CHAP
+ settings are not always used initially. On the iSCSI gateway kernel
+ logs you will see the error:
+
+ ::
+
+ > kernel: CHAP user or password not set for Initiator ACL
+ > kernel: Security negotiation failed.
+ > kernel: iSCSI Login negotiation failed.
+
+ To workaround this set the CHAP settings with the esxcli command. Here
+ authname is the username and secret is the password used in previous
+ examples:
+
+ ::
+
+ > esxcli iscsi adapter auth chap set --direction=uni --authname=myiscsiusername --secret=myiscsipassword --level=discouraged -A vmhba64
+
+#. Configure iSCSI Settings
+
+ .. image:: ../images/esx_iscsi_recov_timeout.png
+ :align: center
+
+ Expand Advanced settings and set the "RecoveryTimeout" to 25.
+
+#. Set the discovery address
+
+ .. image:: ../images/esx_config_iscsi_main.png
+ :align: center
+
+ In the Dynamic targets section, click "Add dynamic target" and under
+ Addresses add one of the gateway IP addresses added during the iSCSI
+ gateway setup stage in the gwcli section or an IP set in the ansible
+ gateway_ip_list variable. Only one address needs to be added as the gateways
+ have been setup so all the iSCSI portals are returned during discovery.
+
+ Finally, click the "Save configuration" button. In the Devices tab, you
+ should see the RBD image.
+
+ The LUN should be automatically configured and using the ALUA SATP and
+ MRU PSP. Other SATPs and PSPs must not be used. This can be verified with
+ the esxcli command:
+
+ ::
+
+ > esxcli storage nmp path list -d eui.your_devices_id
+
diff --git a/doc/rbd/iscsi-initiator-linux.rst b/doc/rbd/iscsi-initiator-linux.rst
new file mode 100644
index 000000000..ba374c40c
--- /dev/null
+++ b/doc/rbd/iscsi-initiator-linux.rst
@@ -0,0 +1,91 @@
+-------------------------
+iSCSI Initiator for Linux
+-------------------------
+
+**Prerequisite:**
+
+- Package ``iscsi-initiator-utils``
+
+- Package ``device-mapper-multipath``
+
+**Installing:**
+
+Install the iSCSI initiator and multipath tools:
+
+ ::
+
+ # yum install iscsi-initiator-utils
+ # yum install device-mapper-multipath
+
+**Configuring:**
+
+#. Create the default ``/etc/multipath.conf`` file and enable the
+ ``multipathd`` service:
+
+ ::
+
+ # mpathconf --enable --with_multipathd y
+
+#. Add the following to ``/etc/multipath.conf`` file:
+
+ ::
+
+ devices {
+ device {
+ vendor "LIO-ORG"
+ hardware_handler "1 alua"
+ path_grouping_policy "failover"
+ path_selector "queue-length 0"
+ failback 60
+ path_checker tur
+ prio alua
+ prio_args exclusive_pref_bit
+ fast_io_fail_tmo 25
+ no_path_retry queue
+ }
+ }
+
+#. Restart the ``multipathd`` service:
+
+ ::
+
+ # systemctl reload multipathd
+
+**iSCSI Discovery and Setup:**
+
+#. If CHAP was setup on the iSCSI gateway, provide a CHAP username and
+ password by updating the ``/etc/iscsi/iscsid.conf`` file accordingly.
+
+#. Discover the target portals:
+
+ ::
+
+ # iscsiadm -m discovery -t st -p 192.168.56.101
+ 192.168.56.101:3260,1 iqn.2003-01.org.linux-iscsi.rheln1
+ 192.168.56.102:3260,2 iqn.2003-01.org.linux-iscsi.rheln1
+
+#. Login to target:
+
+ ::
+
+ # iscsiadm -m node -T iqn.2003-01.org.linux-iscsi.rheln1 -l
+
+**Multipath IO Setup:**
+
+The multipath daemon (``multipathd``), will set up devices automatically
+based on the ``multipath.conf`` settings. Running the ``multipath``
+command show devices setup in a failover configuration with a priority
+group for each path.
+
+::
+
+ # multipath -ll
+ mpathbt (360014059ca317516a69465c883a29603) dm-1 LIO-ORG ,IBLOCK
+ size=1.0G features='0' hwhandler='1 alua' wp=rw
+ |-+- policy='queue-length 0' prio=50 status=active
+ | `- 28:0:0:1 sde 8:64 active ready running
+ `-+- policy='queue-length 0' prio=10 status=enabled
+ `- 29:0:0:1 sdc 8:32 active ready running
+
+You should now be able to use the RBD image like you would a normal
+multipath’d iSCSI disk.
diff --git a/doc/rbd/iscsi-initiator-win.rst b/doc/rbd/iscsi-initiator-win.rst
new file mode 100644
index 000000000..e76b03d18
--- /dev/null
+++ b/doc/rbd/iscsi-initiator-win.rst
@@ -0,0 +1,102 @@
+-------------------------------------
+iSCSI Initiator for Microsoft Windows
+-------------------------------------
+
+**Prerequisite:**
+
+- Microsoft Windows Server 2016 or later
+
+**iSCSI Initiator, Discovery and Setup:**
+
+#. Install the iSCSI initiator driver and MPIO tools.
+
+#. Launch the MPIO program, click on the "Discover Multi-Paths" tab, check the
+ "Add support for iSCSI devices” box, and click "Add". This will require a
+ reboot.
+
+#. On the iSCSI Initiator Properties window, on the "Discovery" tab, add a target
+ portal. Enter the IP address or DNS name and Port of the Ceph iSCSI gateway.
+
+#. On the “Targets” tab, select the target and click on “Connect”.
+
+#. On the “Connect To Target” window, select the “Enable multi-path” option, and
+ click the “Advanced” button.
+
+#. Under the "Connet using" section, select a “Target portal IP” . Select the
+ “Enable CHAP login on” and enter the "Name" and "Target secret" values from the
+ Ceph iSCSI Ansible client credentials section, and click OK.
+
+#. Repeat steps 5 and 6 for each target portal defined when setting up
+ the iSCSI gateway.
+
+**Multipath IO Setup:**
+
+Configuring the MPIO load balancing policy, setting the timeout and
+retry options are using PowerShell with the ``mpclaim`` command. The
+rest is done in the iSCSI Initiator tool.
+
+.. note::
+ It is recommended to increase the ``PDORemovePeriod`` option to 120
+ seconds from PowerShell. This value might need to be adjusted based
+ on the application. When all paths are down, and 120 seconds
+ expires, the operating system will start failing IO requests.
+
+::
+
+ Set-MPIOSetting -NewPDORemovePeriod 120
+
+::
+
+ mpclaim.exe -l -m 1
+
+::
+
+ mpclaim -s -m
+ MSDSM-wide Load Balance Policy: Fail Over Only
+
+#. Using the iSCSI Initiator tool, from the “Targets” tab, click on
+ the “Devices...” button.
+
+#. From the Devices window, select a disk and click the
+ “MPIO...” button.
+
+#. On the "Device Details" window the paths to each target portal is
+ displayed. If using the ``ceph-ansible`` setup method, the
+ iSCSI gateway will use ALUA to tell the iSCSI initiator which path
+ and iSCSI gateway should be used as the primary path. The Load
+ Balancing Policy “Fail Over Only” must be selected
+
+::
+
+ mpclaim -s -d $MPIO_DISK_ID
+
+.. note::
+ For the ``ceph-ansible`` setup method, there will be one
+ Active/Optimized path which is the path to the iSCSI gateway node
+ that owns the LUN, and there will be an Active/Unoptimized path for
+ each other iSCSI gateway node.
+
+**Tuning:**
+
+Consider using the following registry settings:
+
+- Windows Disk Timeout
+
+ ::
+
+ HKEY_LOCAL_MACHINE\System\CurrentControlSet\Services\Disk
+
+ ::
+
+ TimeOutValue = 65
+
+- Microsoft iSCSI Initiator Driver
+
+ ::
+
+ HKEY_LOCAL_MACHINE\\SYSTEM\CurrentControlSet\Control\Class\{4D36E97B-E325-11CE-BFC1-08002BE10318}\<Instance_Number>\Parameters
+
+ ::
+
+ LinkDownTime = 25
+ SRBTimeoutDelta = 15
diff --git a/doc/rbd/iscsi-initiators.rst b/doc/rbd/iscsi-initiators.rst
new file mode 100644
index 000000000..67447789d
--- /dev/null
+++ b/doc/rbd/iscsi-initiators.rst
@@ -0,0 +1,23 @@
+--------------------------------
+Configuring the iSCSI Initiators
+--------------------------------
+
+- `iSCSI Initiator for Linux <../iscsi-initiator-linux>`_
+
+- `iSCSI Initiator for Microsoft Windows <../iscsi-initiator-win>`_
+
+- `iSCSI Initiator for VMware ESX <../iscsi-initiator-esx>`_
+
+ .. warning::
+
+ Applications that use SCSI persistent group reservations (PGR) and
+ SCSI 2 based reservations are not supported when exporting a RBD image
+ through more than one iSCSI gateway.
+
+.. toctree::
+ :maxdepth: 1
+ :hidden:
+
+ Linux <iscsi-initiator-linux>
+ Microsoft Windows <iscsi-initiator-win>
+ VMware ESX <iscsi-initiator-esx>
diff --git a/doc/rbd/iscsi-monitoring.rst b/doc/rbd/iscsi-monitoring.rst
new file mode 100644
index 000000000..da94bad87
--- /dev/null
+++ b/doc/rbd/iscsi-monitoring.rst
@@ -0,0 +1,85 @@
+------------------------------
+Monitoring Ceph iSCSI gateways
+------------------------------
+
+Ceph provides a tool for iSCSI gateway environments
+to monitor performance of exported RADOS Block Device (RBD) images.
+
+The ``gwtop`` tool is a ``top``-like tool that displays aggregated
+performance metrics of RBD images that are exported to clients over
+iSCSI. The metrics are sourced from a Performance Metrics Domain Agent
+(PMDA). Information from the Linux-IO target (LIO) PMDA is used to list
+each exported RBD image, the connected client, and its associated I/O
+metrics.
+
+**Requirements:**
+
+- A running Ceph iSCSI gateway
+
+**Installing:**
+
+#. As ``root``, install the ``ceph-iscsi-tools`` package on each iSCSI
+ gateway node:
+
+ ::
+
+ # yum install ceph-iscsi-tools
+
+#. As ``root``, install the performance co-pilot package on each iSCSI
+ gateway node:
+
+ ::
+
+ # yum install pcp
+
+#. As ``root``, install the LIO PMDA package on each iSCSI gateway node:
+
+ ::
+
+ # yum install pcp-pmda-lio
+
+#. As ``root``, enable and start the performance co-pilot service on
+ each iSCSI gateway node:
+
+ ::
+
+ # systemctl enable pmcd
+ # systemctl start pmcd
+
+#. As ``root``, register the ``pcp-pmda-lio`` agent:
+
+ ::
+
+ cd /var/lib/pcp/pmdas/lio
+ ./Install
+
+By default, ``gwtop`` assumes the iSCSI gateway configuration object is
+stored in a RADOS object called ``gateway.conf`` in the ``rbd`` pool.
+This configuration defines the iSCSI gateways to contact for gathering
+the performance statistics. This can be overridden by using either the
+``-g`` or ``-c`` flags. See ``gwtop --help`` for more details.
+
+The LIO configuration determines which type of performance statistics to
+extract from performance co-pilot. When ``gwtop`` starts it looks at the
+LIO configuration, and if it find user-space disks, then ``gwtop``
+selects the LIO collector automatically.
+
+**Example ``gwtop`` Outputs**
+
+::
+
+ gwtop 2/2 Gateways CPU% MIN: 4 MAX: 5 Network Total In: 2M Out: 3M 10:20:00
+ Capacity: 8G Disks: 8 IOPS: 503 Clients: 1 Ceph: HEALTH_OK OSDs: 3
+ Pool.Image Src Size iops rMB/s wMB/s Client
+ iscsi.t1703 500M 0 0.00 0.00
+ iscsi.testme1 500M 0 0.00 0.00
+ iscsi.testme2 500M 0 0.00 0.00
+ iscsi.testme3 500M 0 0.00 0.00
+ iscsi.testme5 500M 0 0.00 0.00
+ rbd.myhost_1 T 4G 504 1.95 0.00 rh460p(CON)
+ rbd.test_2 1G 0 0.00 0.00
+ rbd.testme 500M 0 0.00 0.00
+
+In the *Client* column, ``(CON)`` means the iSCSI initiator (client) is
+currently logged into the iSCSI gateway. If ``-multi-`` is displayed,
+then multiple clients are mapped to the single RBD image.
diff --git a/doc/rbd/iscsi-overview.rst b/doc/rbd/iscsi-overview.rst
new file mode 100644
index 000000000..879083c3f
--- /dev/null
+++ b/doc/rbd/iscsi-overview.rst
@@ -0,0 +1,57 @@
+.. _ceph-iscsi:
+
+==================
+Ceph iSCSI Gateway
+==================
+
+The iSCSI Gateway presents a Highly Available (HA) iSCSI target that exports
+RADOS Block Device (RBD) images as SCSI disks. The iSCSI protocol allows
+clients (initiators) to send SCSI commands to storage devices (targets) over a
+TCP/IP network, enabling clients without native Ceph client support to access
+Ceph block storage.
+
+Each iSCSI gateway exploits the Linux IO target kernel subsystem (LIO) to
+provide iSCSI protocol support. LIO utilizes userspace passthrough (TCMU) to
+interact with Ceph's librbd library and expose RBD images to iSCSI clients.
+With Ceph’s iSCSI gateway you can provision a fully integrated block-storage
+infrastructure with all the features and benefits of a conventional Storage
+Area Network (SAN).
+
+.. ditaa::
+ Cluster Network (optional)
+ +-------------------------------------------+
+ | | | |
+ +-------+ +-------+ +-------+ +-------+
+ | | | | | | | |
+ | OSD 1 | | OSD 2 | | OSD 3 | | OSD N |
+ | {s}| | {s}| | {s}| | {s}|
+ +-------+ +-------+ +-------+ +-------+
+ | | | |
+ +--------->| | +---------+ | |<---------+
+ : | | | RBD | | | :
+ | +----------------| Image |----------------+ |
+ | Public Network | {d} | |
+ | +---------+ |
+ | |
+ | +-------------------+ |
+ | +--------------+ | iSCSI Initiators | +--------------+ |
+ | | iSCSI GW | | +-----------+ | | iSCSI GW | |
+ +-->| RBD Module |<--+ | Various | +-->| RBD Module |<--+
+ | | | | Operating | | | |
+ +--------------+ | | Systems | | +--------------+
+ | +-----------+ |
+ +-------------------+
+
+.. warning::
+
+ The iSCSI gateway is in maintenance as of November 2022. This means that
+ it is no longer in active development and will not be updated to add
+ new features.
+
+.. toctree::
+ :maxdepth: 1
+
+ Requirements <iscsi-requirements>
+ Configuring the iSCSI Target <iscsi-targets>
+ Configuring the iSCSI Initiators <iscsi-initiators>
+ Monitoring the iSCSI Gateways <iscsi-monitoring>
diff --git a/doc/rbd/iscsi-requirements.rst b/doc/rbd/iscsi-requirements.rst
new file mode 100644
index 000000000..50dfc2a27
--- /dev/null
+++ b/doc/rbd/iscsi-requirements.rst
@@ -0,0 +1,51 @@
+==========================
+iSCSI Gateway Requirements
+==========================
+
+It is recommended to provision two to four iSCSI gateway nodes to
+realize a highly available Ceph iSCSI gateway solution.
+
+For hardware recommendations, see :ref:`hardware-recommendations` .
+
+.. note::
+ On iSCSI gateway nodes the memory footprint is a function of
+ of the RBD images mapped and can grow to be large. Plan memory
+ requirements accordingly based on the number RBD images to be mapped.
+
+There are no specific iSCSI gateway options for the Ceph Monitors or
+OSDs, but it is important to lower the default heartbeat interval for
+detecting down OSDs to reduce the possibility of initiator timeouts.
+The following configuration options are suggested::
+
+ [osd]
+ osd heartbeat grace = 20
+ osd heartbeat interval = 5
+
+- Updating Running State From a Ceph Monitor Node
+
+ ::
+
+ ceph tell <daemon_type>.<id> config set <parameter_name> <new_value>
+
+ ::
+
+ ceph tell osd.* config set osd_heartbeat_grace 20
+ ceph tell osd.* config set osd_heartbeat_interval 5
+
+- Updating Running State On Each OSD Node
+
+ ::
+
+ ceph daemon <daemon_type>.<id> config set osd_client_watch_timeout 15
+
+ ::
+
+ ceph daemon osd.0 config set osd_heartbeat_grace 20
+ ceph daemon osd.0 config set osd_heartbeat_interval 5
+
+For more details on setting Ceph's configuration options, see
+:ref:`configuring-ceph`. Be sure to persist these settings in
+``/etc/ceph.conf`` or, on Mimic and later releases, in the
+centralized config store.
+
+
diff --git a/doc/rbd/iscsi-target-ansible.rst b/doc/rbd/iscsi-target-ansible.rst
new file mode 100644
index 000000000..d38e3bd01
--- /dev/null
+++ b/doc/rbd/iscsi-target-ansible.rst
@@ -0,0 +1,234 @@
+==========================================
+Configuring the iSCSI Target using Ansible
+==========================================
+
+The Ceph iSCSI gateway is the iSCSI target node and also a Ceph client
+node. The Ceph iSCSI gateway can be provisioned on dedicated node
+or be colocated on a Ceph Object Store Disk (OSD) node. The following steps will
+install and configure the Ceph iSCSI gateway for basic operation.
+
+**Requirements:**
+
+- A running Ceph Luminous (12.2.x) cluster or newer
+
+- Red Hat Enterprise Linux/CentOS 7.5 (or newer); Linux kernel v4.16 (or newer)
+
+- The ``ceph-iscsi`` package installed on all the iSCSI gateway nodes
+
+**Installation:**
+
+#. On the Ansible installer node, which could be either the administration node
+ or a dedicated deployment node, perform the following steps:
+
+ #. As ``root``, install the ``ceph-ansible`` package:
+
+ ::
+
+ # yum install ceph-ansible
+
+ #. Add an entry in ``/etc/ansible/hosts`` file for the gateway group:
+
+ ::
+
+ [iscsigws]
+ ceph-igw-1
+ ceph-igw-2
+
+.. note::
+ If co-locating the iSCSI gateway with an OSD node, then add the OSD node to the
+ ``[iscsigws]`` section.
+
+**Configuration:**
+
+The ``ceph-ansible`` package places a file in the ``/usr/share/ceph-ansible/group_vars/``
+directory called ``iscsigws.yml.sample``. Create a copy of this sample file named
+``iscsigws.yml``. Review the following Ansible variables and descriptions,
+and update accordingly. See the ``iscsigws.yml.sample`` for a full list of
+advanced variables.
+
++--------------------------------------+--------------------------------------+
+| Variable | Meaning/Purpose |
++======================================+======================================+
+| ``seed_monitor`` | Each gateway needs access to the |
+| | ceph cluster for rados and rbd |
+| | calls. This means the iSCSI gateway |
+| | must have an appropriate |
+| | ``/etc/ceph/`` directory defined. |
+| | The ``seed_monitor`` host is used to |
+| | populate the iSCSI gateway’s |
+| | ``/etc/ceph/`` directory. |
++--------------------------------------+--------------------------------------+
+| ``cluster_name`` | Define a custom storage cluster |
+| | name. |
++--------------------------------------+--------------------------------------+
+| ``gateway_keyring`` | Define a custom keyring name. |
++--------------------------------------+--------------------------------------+
+| ``deploy_settings`` | If set to ``true``, then deploy the |
+| | settings when the playbook is ran. |
++--------------------------------------+--------------------------------------+
+| ``perform_system_checks`` | This is a boolean value that checks |
+| | for multipath and lvm configuration |
+| | settings on each gateway. It must be |
+| | set to true for at least the first |
+| | run to ensure multipathd and lvm are |
+| | configured properly. |
++--------------------------------------+--------------------------------------+
+| ``api_user`` | The user name for the API. The |
+| | default is `admin`. |
++--------------------------------------+--------------------------------------+
+| ``api_password`` | The password for using the API. The |
+| | default is `admin`. |
++--------------------------------------+--------------------------------------+
+| ``api_port`` | The TCP port number for using the |
+| | API. The default is `5000`. |
++--------------------------------------+--------------------------------------+
+| ``api_secure`` | True if TLS must be used. The |
+| | default is `false`. If true the user |
+| | must create the necessary |
+| | certificate and key files. See the |
+| | gwcli man file for details. |
++--------------------------------------+--------------------------------------+
+| ``trusted_ip_list`` | A list of IPv4 or IPv6 addresses |
+| | who have access to the API. By |
+| | default, only the iSCSI gateway |
+| | nodes have access. |
++--------------------------------------+--------------------------------------+
+
+**Deployment:**
+
+Perform the followint steps on the Ansible installer node.
+
+#. As ``root``, execute the Ansible playbook:
+
+ ::
+
+ # cd /usr/share/ceph-ansible
+ # ansible-playbook site.yml --limit iscsigws
+
+ .. note::
+ The Ansible playbook will handle RPM dependencies, setting up daemons,
+ and installing gwcli so it can be used to create iSCSI targets and export
+ RBD images as LUNs. In past versions, ``iscsigws.yml`` could define the
+ iSCSI target and other objects like clients, images and LUNs, but this is
+ no longer supported.
+
+#. Verify the configuration from an iSCSI gateway node:
+
+ ::
+
+ # gwcli ls
+
+ .. note::
+ See the `Configuring the iSCSI Target using the Command Line Interface`_
+ section to create gateways, LUNs, and clients using the `gwcli` tool.
+
+ .. important::
+ Attempting to use the ``targetcli`` tool to change the configuration will
+ cause problems including ALUA misconfiguration and path failover
+ issues. There is the potential to corrupt data, to have mismatched
+ configuration across iSCSI gateways, and to have mismatched WWN information,
+ leading to client multipath problems.
+
+**Service Management:**
+
+The ``ceph-iscsi`` package installs the configuration management
+logic and a Systemd service called ``rbd-target-api``. When the Systemd
+service is enabled, the ``rbd-target-api`` will start at boot time and
+will restore the Linux IO state. The Ansible playbook disables the
+target service during the deployment. Below are the outcomes of when
+interacting with the ``rbd-target-api`` Systemd service.
+
+::
+
+ # systemctl <start|stop|restart|reload> rbd-target-api
+
+- ``reload``
+
+ A reload request will force ``rbd-target-api`` to reread the
+ configuration and apply it to the current running environment. This
+ is normally not required, since changes are deployed in parallel from
+ Ansible to all iSCSI gateway nodes
+
+- ``stop``
+
+ A stop request will close the gateway’s portal interfaces, dropping
+ connections to clients and wipe the current LIO configuration from
+ the kernel. This returns the iSCSI gateway to a clean state. When
+ clients are disconnected, active I/O is rescheduled to the other
+ iSCSI gateways by the client side multipathing layer.
+
+**Removing the Configuration:**
+
+The ``ceph-ansible`` package provides an Ansible playbook to
+remove the iSCSI gateway configuration and related RBD images. The
+Ansible playbook is ``/usr/share/ceph-ansible/purge_gateways.yml``. When
+this Ansible playbook is ran a prompted for the type of purge to
+perform:
+
+*lio* :
+
+In this mode the LIO configuration is purged on all iSCSI gateways that
+are defined. Disks that were created are left untouched within the Ceph
+storage cluster.
+
+*all* :
+
+When ``all`` is chosen, the LIO configuration is removed together with
+**all** RBD images that were defined within the iSCSI gateway
+environment, other unrelated RBD images will not be removed. Ensure the
+correct mode is chosen, this operation will delete data.
+
+.. warning::
+ A purge operation is destructive action against your iSCSI gateway
+ environment.
+
+.. warning::
+ A purge operation will fail, if RBD images have snapshots or clones
+ and are exported through the Ceph iSCSI gateway.
+
+::
+
+ [root@rh7-iscsi-client ceph-ansible]# ansible-playbook purge_gateways.yml
+ Which configuration elements should be purged? (all, lio or abort) [abort]: all
+
+
+ PLAY [Confirm removal of the iSCSI gateway configuration] *********************
+
+
+ GATHERING FACTS ***************************************************************
+ ok: [localhost]
+
+
+ TASK: [Exit playbook if user aborted the purge] *******************************
+ skipping: [localhost]
+
+
+ TASK: [set_fact ] *************************************************************
+ ok: [localhost]
+
+
+ PLAY [Removing the gateway configuration] *************************************
+
+
+ GATHERING FACTS ***************************************************************
+ ok: [ceph-igw-1]
+ ok: [ceph-igw-2]
+
+
+ TASK: [igw_purge | purging the gateway configuration] *************************
+ changed: [ceph-igw-1]
+ changed: [ceph-igw-2]
+
+
+ TASK: [igw_purge | deleting configured rbd devices] ***************************
+ changed: [ceph-igw-1]
+ changed: [ceph-igw-2]
+
+
+ PLAY RECAP ********************************************************************
+ ceph-igw-1 : ok=3 changed=2 unreachable=0 failed=0
+ ceph-igw-2 : ok=3 changed=2 unreachable=0 failed=0
+ localhost : ok=2 changed=0 unreachable=0 failed=0
+
+
+.. _Configuring the iSCSI Target using the Command Line Interface: ../iscsi-target-cli
diff --git a/doc/rbd/iscsi-target-cli-manual-install.rst b/doc/rbd/iscsi-target-cli-manual-install.rst
new file mode 100644
index 000000000..ccc422e0d
--- /dev/null
+++ b/doc/rbd/iscsi-target-cli-manual-install.rst
@@ -0,0 +1,190 @@
+==============================
+Manual ceph-iscsi Installation
+==============================
+
+**Requirements**
+
+To complete the installation of ceph-iscsi, there are 4 steps:
+
+1. Install common packages from your Linux distribution's software repository
+2. Install Git to fetch the remaining packages directly from their Git repositories
+3. Ensure a compatible kernel is used
+4. Install all the components of ceph-iscsi and start associated daemons:
+
+ - tcmu-runner
+ - rtslib-fb
+ - configshell-fb
+ - targetcli-fb
+ - ceph-iscsi
+
+
+1. Install Common Packages
+==========================
+
+The following packages will be used by ceph-iscsi and target tools.
+They must be installed from your Linux distribution's software repository
+on each machine that will be a iSCSI gateway:
+
+- libnl3
+- libkmod
+- librbd1
+- pyparsing
+- python kmod
+- python pyudev
+- python gobject
+- python urwid
+- python pyparsing
+- python rados
+- python rbd
+- python netifaces
+- python crypto
+- python requests
+- python flask
+- pyOpenSSL
+
+
+2. Install Git
+==============
+
+In order to install all the packages needed to run iSCSI with Ceph, you need to download them directly from their repository by using Git.
+On CentOS/RHEL execute:
+
+::
+
+ > sudo yum install git
+
+On Debian/Ubuntu execute:
+
+::
+
+ > sudo apt install git
+
+To know more about Git and how it works, please, visit https://git-scm.com
+
+
+3. Ensure a compatible kernel is used
+=====================================
+
+Ensure you use a supported kernel that contains the required Ceph iSCSI patches:
+
+- all Linux distribution with a kernel v4.16 or newer, or
+- Red Hat Enterprise Linux or CentOS 7.5 or later (in these distributions ceph-iscsi support is backported)
+
+If you are already using a compatible kernel, you can go to next step.
+However, if you are NOT using a compatible kernel then check your distro's
+documentation for specific instructions on how to build this kernel. The only
+Ceph iSCSI specific requirements are that the following build options must be
+enabled:
+
+ ::
+
+ CONFIG_TARGET_CORE=m
+ CONFIG_TCM_USER2=m
+ CONFIG_ISCSI_TARGET=m
+
+
+4. Install ceph-iscsi
+========================================================
+
+Finally, the remaining tools can be fetched directly from their Git repositories and their associated services started
+
+
+tcmu-runner
+-----------
+
+ Installation:
+
+ ::
+
+ > git clone https://github.com/open-iscsi/tcmu-runner
+ > cd tcmu-runner
+
+ Run the following command to install all the needed dependencies:
+
+ ::
+
+ > ./extra/install_dep.sh
+
+ Now you can build the tcmu-runner.
+ To do so, use the following build command:
+
+ ::
+
+ > cmake -Dwith-glfs=false -Dwith-qcow=false -DSUPPORT_SYSTEMD=ON -DCMAKE_INSTALL_PREFIX=/usr
+ > make install
+
+ Enable and start the daemon:
+
+ ::
+
+ > systemctl daemon-reload
+ > systemctl enable tcmu-runner
+ > systemctl start tcmu-runner
+
+
+rtslib-fb
+---------
+
+ Installation:
+
+ ::
+
+ > git clone https://github.com/open-iscsi/rtslib-fb.git
+ > cd rtslib-fb
+ > python setup.py install
+
+configshell-fb
+--------------
+
+ Installation:
+
+ ::
+
+ > git clone https://github.com/open-iscsi/configshell-fb.git
+ > cd configshell-fb
+ > python setup.py install
+
+targetcli-fb
+------------
+
+ Installation:
+
+ ::
+
+ > git clone https://github.com/open-iscsi/targetcli-fb.git
+ > cd targetcli-fb
+ > python setup.py install
+ > mkdir /etc/target
+ > mkdir /var/target
+
+ .. warning:: The ceph-iscsi tools assume they are managing all targets
+ on the system. If targets have been setup and are being managed by
+ targetcli the target service must be disabled.
+
+ceph-iscsi
+-----------------
+
+ Installation:
+
+ ::
+
+ > git clone https://github.com/ceph/ceph-iscsi.git
+ > cd ceph-iscsi
+ > python setup.py install --install-scripts=/usr/bin
+ > cp usr/lib/systemd/system/rbd-target-gw.service /lib/systemd/system
+ > cp usr/lib/systemd/system/rbd-target-api.service /lib/systemd/system
+
+ Enable and start the daemon:
+
+ ::
+
+ > systemctl daemon-reload
+ > systemctl enable rbd-target-gw
+ > systemctl start rbd-target-gw
+ > systemctl enable rbd-target-api
+ > systemctl start rbd-target-api
+
+Installation is complete. Proceed to the setup section in the
+`main ceph-iscsi CLI page`_.
+
+.. _`main ceph-iscsi CLI page`: ../iscsi-target-cli
diff --git a/doc/rbd/iscsi-target-cli.rst b/doc/rbd/iscsi-target-cli.rst
new file mode 100644
index 000000000..d888a34b0
--- /dev/null
+++ b/doc/rbd/iscsi-target-cli.rst
@@ -0,0 +1,244 @@
+=============================================================
+Configuring the iSCSI Target using the Command Line Interface
+=============================================================
+
+The Ceph iSCSI gateway is both an iSCSI target and a Ceph client;
+think of it as a "translator" between Ceph's RBD interface
+and the iSCSI standard. The Ceph iSCSI gateway can run on a
+standalone node or be colocated with other daemons eg. on
+a Ceph Object Store Disk (OSD) node. When co-locating, ensure
+that sufficient CPU and memory are available to share.
+The following steps install and configure the Ceph iSCSI gateway for basic operation.
+
+**Requirements:**
+
+- A running Ceph Luminous or later storage cluster
+
+- Red Hat Enterprise Linux/CentOS 7.5 (or newer); Linux kernel v4.16 (or newer)
+
+- The following packages must be installed from your Linux distribution's software repository:
+
+ - ``targetcli-2.1.fb47`` or newer package
+
+ - ``python-rtslib-2.1.fb68`` or newer package
+
+ - ``tcmu-runner-1.4.0`` or newer package
+
+ - ``ceph-iscsi-3.2`` or newer package
+
+ .. important::
+ If previous versions of these packages exist, then they must
+ be removed first before installing the newer versions.
+
+Do the following steps on the Ceph iSCSI gateway node before proceeding
+to the *Installing* section:
+
+#. If the Ceph iSCSI gateway is not colocated on an OSD node, then copy
+ the Ceph configuration files, located in ``/etc/ceph/``, from a
+ running Ceph node in the storage cluster to the iSCSI Gateway node.
+ The Ceph configuration files must exist on the iSCSI gateway node
+ under ``/etc/ceph/``.
+
+#. Install and configure the `Ceph Command-line Interface`_
+
+#. If needed, open TCP ports 3260 and 5000 on the firewall.
+
+ .. note::
+ Access to port 5000 should be restricted to a trusted internal network or
+ only the individual hosts where ``gwcli`` is used or ``ceph-mgr`` daemons
+ are running.
+
+#. Create a new or use an existing RADOS Block Device (RBD).
+
+**Installing:**
+
+If you are using the upstream ceph-iscsi package follow the
+`manual install instructions`_.
+
+.. _`manual install instructions`: ../iscsi-target-cli-manual-install
+
+.. toctree::
+ :hidden:
+
+ iscsi-target-cli-manual-install
+
+For rpm based instructions execute the following commands:
+
+#. As ``root``, on all iSCSI gateway nodes, install the
+ ``ceph-iscsi`` package:
+
+ ::
+
+ # yum install ceph-iscsi
+
+#. As ``root``, on all iSCSI gateway nodes, install the ``tcmu-runner``
+ package:
+
+ ::
+
+ # yum install tcmu-runner
+
+**Setup:**
+
+#. gwcli requires a pool with the name ``rbd``, so it can store metadata
+ like the iSCSI configuration. To check if this pool has been created
+ run:
+
+ ::
+
+ # ceph osd lspools
+
+ If it does not exist instructions for creating pools can be found on the
+ `RADOS pool operations page
+ <http://docs.ceph.com/en/latest/rados/operations/pools/>`_.
+
+#. As ``root``, on a iSCSI gateway node, create a file named
+ ``iscsi-gateway.cfg`` in the ``/etc/ceph/`` directory:
+
+ ::
+
+ # touch /etc/ceph/iscsi-gateway.cfg
+
+ #. Edit the ``iscsi-gateway.cfg`` file and add the following lines:
+
+ ::
+
+ [config]
+ # Name of the Ceph storage cluster. A suitable Ceph configuration file allowing
+ # access to the Ceph storage cluster from the gateway node is required, if not
+ # colocated on an OSD node.
+ cluster_name = ceph
+
+ # Place a copy of the ceph cluster's admin keyring in the gateway's /etc/ceph
+ # drectory and reference the filename here
+ gateway_keyring = ceph.client.admin.keyring
+
+
+ # API settings.
+ # The API supports a number of options that allow you to tailor it to your
+ # local environment. If you want to run the API under https, you will need to
+ # create cert/key files that are compatible for each iSCSI gateway node, that is
+ # not locked to a specific node. SSL cert and key files *must* be called
+ # 'iscsi-gateway.crt' and 'iscsi-gateway.key' and placed in the '/etc/ceph/' directory
+ # on *each* gateway node. With the SSL files in place, you can use 'api_secure = true'
+ # to switch to https mode.
+
+ # To support the API, the bare minimum settings are:
+ api_secure = false
+
+ # Additional API configuration options are as follows, defaults shown.
+ # api_user = admin
+ # api_password = admin
+ # api_port = 5001
+ # trusted_ip_list = 192.168.0.10,192.168.0.11
+
+ .. note::
+ trusted_ip_list is a list of IP addresses on each iSCSI gateway that
+ will be used for management operations like target creation, LUN
+ exporting, etc. The IP can be the same that will be used for iSCSI
+ data, like READ/WRITE commands to/from the RBD image, but using
+ separate IPs is recommended.
+
+ .. important::
+ The ``iscsi-gateway.cfg`` file must be identical on all iSCSI gateway nodes.
+
+ #. As ``root``, copy the ``iscsi-gateway.cfg`` file to all iSCSI
+ gateway nodes.
+
+#. As ``root``, on all iSCSI gateway nodes, enable and start the API
+ service:
+
+ ::
+
+ # systemctl daemon-reload
+
+ # systemctl enable rbd-target-gw
+ # systemctl start rbd-target-gw
+
+ # systemctl enable rbd-target-api
+ # systemctl start rbd-target-api
+
+
+**Configuring:**
+
+gwcli will create and configure the iSCSI target and RBD images and copy the
+configuration across the gateways setup in the last section. Lower level
+tools including targetcli and rbd can be used to query the local configuration,
+but should not be used to modify it. This next section will demonstrate how
+to create a iSCSI target and export a RBD image as LUN 0.
+
+#. As ``root``, on a iSCSI gateway node, start the iSCSI gateway
+ command-line interface:
+
+ ::
+
+ # gwcli
+
+#. Go to iscsi-targets and create a target with the name
+ iqn.2003-01.com.redhat.iscsi-gw:iscsi-igw:
+
+ ::
+
+ > /> cd /iscsi-target
+ > /iscsi-target> create iqn.2003-01.com.redhat.iscsi-gw:iscsi-igw
+
+#. Create the iSCSI gateways. The IPs used below are the ones that will be
+ used for iSCSI data like READ and WRITE commands. They can be the
+ same IPs used for management operations listed in trusted_ip_list,
+ but it is recommended that different IPs are used.
+
+ ::
+
+ > /iscsi-target> cd iqn.2003-01.com.redhat.iscsi-gw:iscsi-igw/gateways
+ > /iscsi-target...-igw/gateways> create ceph-gw-1 10.172.19.21
+ > /iscsi-target...-igw/gateways> create ceph-gw-2 10.172.19.22
+
+ If not using RHEL/CentOS or using an upstream or ceph-iscsi-test kernel,
+ the skipchecks=true argument must be used. This will avoid the Red Hat kernel
+ and rpm checks:
+
+ ::
+
+ > /iscsi-target> cd iqn.2003-01.com.redhat.iscsi-gw:iscsi-igw/gateways
+ > /iscsi-target...-igw/gateways> create ceph-gw-1 10.172.19.21 skipchecks=true
+ > /iscsi-target...-igw/gateways> create ceph-gw-2 10.172.19.22 skipchecks=true
+
+#. Add a RBD image with the name disk_1 in the pool rbd:
+
+ ::
+
+ > /iscsi-target...-igw/gateways> cd /disks
+ > /disks> create pool=rbd image=disk_1 size=90G
+
+#. Create a client with the initiator name iqn.1994-05.com.redhat:rh7-client:
+
+ ::
+
+ > /disks> cd /iscsi-target/iqn.2003-01.com.redhat.iscsi-gw:iscsi-igw/hosts
+ > /iscsi-target...eph-igw/hosts> create iqn.1994-05.com.redhat:rh7-client
+
+#. Set the client's CHAP username to myiscsiusername and password to
+ myiscsipassword:
+
+ ::
+
+ > /iscsi-target...at:rh7-client> auth username=myiscsiusername password=myiscsipassword
+
+ .. warning::
+ CHAP must always be configured. Without CHAP, the target will
+ reject any login requests.
+
+#. Add the disk to the client:
+
+ ::
+
+ > /iscsi-target...at:rh7-client> disk add rbd/disk_1
+
+The next step is to configure the iSCSI initiators.
+
+.. _`Ceph Command-line Interface`: ../../start/quick-rbd/#install-ceph
+
+.. toctree::
+ :hidden:
+
+ ../../start/quick-rbd
diff --git a/doc/rbd/iscsi-targets.rst b/doc/rbd/iscsi-targets.rst
new file mode 100644
index 000000000..d2a035283
--- /dev/null
+++ b/doc/rbd/iscsi-targets.rst
@@ -0,0 +1,27 @@
+=============
+iSCSI Targets
+=============
+
+Traditionally, block-level access to a Ceph storage cluster has been
+limited to QEMU and ``librbd``, which is a key enabler for adoption
+within OpenStack environments. Starting with the Ceph Luminous release,
+block-level access is expanding to offer standard iSCSI support allowing
+wider platform usage, and potentially opening new use cases.
+
+- Red Hat Enterprise Linux/CentOS 7.5 (or newer); Linux kernel v4.16 (or newer)
+
+- A working Ceph Storage cluster, deployed with ``ceph-ansible`` or using the command-line interface
+
+- iSCSI gateways nodes, which can either be colocated with OSD nodes or on dedicated nodes
+
+- Separate network subnets for iSCSI front-end traffic and Ceph back-end traffic
+
+A choice of using Ansible or the command-line interface are the
+available deployment methods for installing and configuring the Ceph
+iSCSI gateway:
+
+.. toctree::
+ :maxdepth: 1
+
+ Using Ansible <iscsi-target-ansible>
+ Using the Command Line Interface <iscsi-target-cli>
diff --git a/doc/rbd/libvirt.rst b/doc/rbd/libvirt.rst
new file mode 100644
index 000000000..e3523f8a8
--- /dev/null
+++ b/doc/rbd/libvirt.rst
@@ -0,0 +1,323 @@
+=================================
+ Using libvirt with Ceph RBD
+=================================
+
+.. index:: Ceph Block Device; livirt
+
+The ``libvirt`` library creates a virtual machine abstraction layer between
+hypervisor interfaces and the software applications that use them. With
+``libvirt``, developers and system administrators can focus on a common
+management framework, common API, and common shell interface (i.e., ``virsh``)
+to many different hypervisors, including:
+
+- QEMU/KVM
+- XEN
+- LXC
+- VirtualBox
+- etc.
+
+Ceph block devices support QEMU/KVM. You can use Ceph block devices with
+software that interfaces with ``libvirt``. The following stack diagram
+illustrates how ``libvirt`` and QEMU use Ceph block devices via ``librbd``.
+
+
+.. ditaa::
+
+ +---------------------------------------------------+
+ | libvirt |
+ +------------------------+--------------------------+
+ |
+ | configures
+ v
+ +---------------------------------------------------+
+ | QEMU |
+ +---------------------------------------------------+
+ | librbd |
+ +---------------------------------------------------+
+ | librados |
+ +------------------------+-+------------------------+
+ | OSDs | | Monitors |
+ +------------------------+ +------------------------+
+
+
+The most common ``libvirt`` use case involves providing Ceph block devices to
+cloud solutions like OpenStack or CloudStack. The cloud solution uses
+``libvirt`` to interact with QEMU/KVM, and QEMU/KVM interacts with Ceph block
+devices via ``librbd``. See `Block Devices and OpenStack`_ and `Block Devices
+and CloudStack`_ for details. See `Installation`_ for installation details.
+
+You can also use Ceph block devices with ``libvirt``, ``virsh`` and the
+``libvirt`` API. See `libvirt Virtualization API`_ for details.
+
+
+To create VMs that use Ceph block devices, use the procedures in the following
+sections. In the exemplary embodiment, we have used ``libvirt-pool`` for the pool
+name, ``client.libvirt`` for the user name, and ``new-libvirt-image`` for the
+image name. You may use any value you like, but ensure you replace those values
+when executing commands in the subsequent procedures.
+
+
+Configuring Ceph
+================
+
+To configure Ceph for use with ``libvirt``, perform the following steps:
+
+#. `Create a pool`_. The following example uses the
+ pool name ``libvirt-pool``.::
+
+ ceph osd pool create libvirt-pool
+
+ Verify the pool exists. ::
+
+ ceph osd lspools
+
+#. Use the ``rbd`` tool to initialize the pool for use by RBD::
+
+ rbd pool init <pool-name>
+
+#. `Create a Ceph User`_ (or use ``client.admin`` for version 0.9.7 and
+ earlier). The following example uses the Ceph user name ``client.libvirt``
+ and references ``libvirt-pool``. ::
+
+ ceph auth get-or-create client.libvirt mon 'profile rbd' osd 'profile rbd pool=libvirt-pool'
+
+ Verify the name exists. ::
+
+ ceph auth ls
+
+ **NOTE**: ``libvirt`` will access Ceph using the ID ``libvirt``,
+ not the Ceph name ``client.libvirt``. See `User Management - User`_ and
+ `User Management - CLI`_ for a detailed explanation of the difference
+ between ID and name.
+
+#. Use QEMU to `create an image`_ in your RBD pool.
+ The following example uses the image name ``new-libvirt-image``
+ and references ``libvirt-pool``. ::
+
+ qemu-img create -f rbd rbd:libvirt-pool/new-libvirt-image 2G
+
+ Verify the image exists. ::
+
+ rbd -p libvirt-pool ls
+
+ **NOTE:** You can also use `rbd create`_ to create an image, but we
+ recommend ensuring that QEMU is working properly.
+
+.. tip:: Optionally, if you wish to enable debug logs and the admin socket for
+ this client, you can add the following section to ``/etc/ceph/ceph.conf``::
+
+ [client.libvirt]
+ log file = /var/log/ceph/qemu-guest-$pid.log
+ admin socket = /var/run/ceph/$cluster-$type.$id.$pid.$cctid.asok
+
+ The ``client.libvirt`` section name should match the cephx user you created
+ above.
+ If SELinux or AppArmor is enabled, note that this could prevent the client
+ process (qemu via libvirt) from doing some operations, such as writing logs
+ or operate the images or admin socket to the destination locations (``/var/
+ log/ceph`` or ``/var/run/ceph``). Additionally, make sure that the libvirt
+ and qemu users have appropriate access to the specified directory.
+
+
+Preparing the VM Manager
+========================
+
+You may use ``libvirt`` without a VM manager, but you may find it simpler to
+create your first domain with ``virt-manager``.
+
+#. Install a virtual machine manager. See `KVM/VirtManager`_ for details. ::
+
+ sudo apt-get install virt-manager
+
+#. Download an OS image (if necessary).
+
+#. Launch the virtual machine manager. ::
+
+ sudo virt-manager
+
+
+
+Creating a VM
+=============
+
+To create a VM with ``virt-manager``, perform the following steps:
+
+#. Press the **Create New Virtual Machine** button.
+
+#. Name the new virtual machine domain. In the exemplary embodiment, we
+ use the name ``libvirt-virtual-machine``. You may use any name you wish,
+ but ensure you replace ``libvirt-virtual-machine`` with the name you
+ choose in subsequent commandline and configuration examples. ::
+
+ libvirt-virtual-machine
+
+#. Import the image. ::
+
+ /path/to/image/recent-linux.img
+
+ **NOTE:** Import a recent image. Some older images may not rescan for
+ virtual devices properly.
+
+#. Configure and start the VM.
+
+#. You may use ``virsh list`` to verify the VM domain exists. ::
+
+ sudo virsh list
+
+#. Login to the VM (root/root)
+
+#. Stop the VM before configuring it for use with Ceph.
+
+
+Configuring the VM
+==================
+
+When configuring the VM for use with Ceph, it is important to use ``virsh``
+where appropriate. Additionally, ``virsh`` commands often require root
+privileges (i.e., ``sudo``) and will not return appropriate results or notify
+you that root privileges are required. For a reference of ``virsh``
+commands, refer to `Virsh Command Reference`_.
+
+
+#. Open the configuration file with ``virsh edit``. ::
+
+ sudo virsh edit {vm-domain-name}
+
+ Under ``<devices>`` there should be a ``<disk>`` entry. ::
+
+ <devices>
+ <emulator>/usr/bin/kvm</emulator>
+ <disk type='file' device='disk'>
+ <driver name='qemu' type='raw'/>
+ <source file='/path/to/image/recent-linux.img'/>
+ <target dev='vda' bus='virtio'/>
+ <address type='drive' controller='0' bus='0' unit='0'/>
+ </disk>
+
+
+ Replace ``/path/to/image/recent-linux.img`` with the path to the OS image.
+ The minimum kernel for using the faster ``virtio`` bus is 2.6.25. See
+ `Virtio`_ for details.
+
+ **IMPORTANT:** Use ``sudo virsh edit`` instead of a text editor. If you edit
+ the configuration file under ``/etc/libvirt/qemu`` with a text editor,
+ ``libvirt`` may not recognize the change. If there is a discrepancy between
+ the contents of the XML file under ``/etc/libvirt/qemu`` and the result of
+ ``sudo virsh dumpxml {vm-domain-name}``, then your VM may not work
+ properly.
+
+
+#. Add the Ceph RBD image you created as a ``<disk>`` entry. ::
+
+ <disk type='network' device='disk'>
+ <source protocol='rbd' name='libvirt-pool/new-libvirt-image'>
+ <host name='{monitor-host}' port='6789'/>
+ </source>
+ <target dev='vdb' bus='virtio'/>
+ </disk>
+
+ Replace ``{monitor-host}`` with the name of your host, and replace the
+ pool and/or image name as necessary. You may add multiple ``<host>``
+ entries for your Ceph monitors. The ``dev`` attribute is the logical
+ device name that will appear under the ``/dev`` directory of your
+ VM. The optional ``bus`` attribute indicates the type of disk device to
+ emulate. The valid settings are driver specific (e.g., "ide", "scsi",
+ "virtio", "xen", "usb" or "sata").
+
+ See `Disks`_ for details of the ``<disk>`` element, and its child elements
+ and attributes.
+
+#. Save the file.
+
+#. If your Ceph Storage Cluster has `Ceph Authentication`_ enabled (it does by
+ default), you must generate a secret. ::
+
+ cat > secret.xml <<EOF
+ <secret ephemeral='no' private='no'>
+ <usage type='ceph'>
+ <name>client.libvirt secret</name>
+ </usage>
+ </secret>
+ EOF
+
+#. Define the secret. ::
+
+ sudo virsh secret-define --file secret.xml
+ {uuid of secret}
+
+#. Get the ``client.libvirt`` key and save the key string to a file. ::
+
+ ceph auth get-key client.libvirt | sudo tee client.libvirt.key
+
+#. Set the UUID of the secret. ::
+
+ sudo virsh secret-set-value --secret {uuid of secret} --base64 $(cat client.libvirt.key) && rm client.libvirt.key secret.xml
+
+ You must also set the secret manually by adding the following ``<auth>``
+ entry to the ``<disk>`` element you entered earlier (replacing the
+ ``uuid`` value with the result from the command line example above). ::
+
+ sudo virsh edit {vm-domain-name}
+
+ Then, add ``<auth></auth>`` element to the domain configuration file::
+
+ ...
+ </source>
+ <auth username='libvirt'>
+ <secret type='ceph' uuid='{uuid of secret}'/>
+ </auth>
+ <target ...
+
+
+ **NOTE:** The exemplary ID is ``libvirt``, not the Ceph name
+ ``client.libvirt`` as generated at step 2 of `Configuring Ceph`_. Ensure
+ you use the ID component of the Ceph name you generated. If for some reason
+ you need to regenerate the secret, you will have to execute
+ ``sudo virsh secret-undefine {uuid}`` before executing
+ ``sudo virsh secret-set-value`` again.
+
+
+Summary
+=======
+
+Once you have configured the VM for use with Ceph, you can start the VM.
+To verify that the VM and Ceph are communicating, you may perform the
+following procedures.
+
+
+#. Check to see if Ceph is running::
+
+ ceph health
+
+#. Check to see if the VM is running. ::
+
+ sudo virsh list
+
+#. Check to see if the VM is communicating with Ceph. Replace
+ ``{vm-domain-name}`` with the name of your VM domain::
+
+ sudo virsh qemu-monitor-command --hmp {vm-domain-name} 'info block'
+
+#. Check to see if the device from ``<target dev='vdb' bus='virtio'/>`` exists::
+
+ virsh domblklist {vm-domain-name} --details
+
+If everything looks okay, you may begin using the Ceph block device
+within your VM.
+
+
+.. _Installation: ../../install
+.. _libvirt Virtualization API: http://www.libvirt.org
+.. _Block Devices and OpenStack: ../rbd-openstack
+.. _Block Devices and CloudStack: ../rbd-cloudstack
+.. _Create a pool: ../../rados/operations/pools#create-a-pool
+.. _Create a Ceph User: ../../rados/operations/user-management#add-a-user
+.. _create an image: ../qemu-rbd#creating-images-with-qemu
+.. _Virsh Command Reference: http://www.libvirt.org/virshcmdref.html
+.. _KVM/VirtManager: https://help.ubuntu.com/community/KVM/VirtManager
+.. _Ceph Authentication: ../../rados/configuration/auth-config-ref
+.. _Disks: http://www.libvirt.org/formatdomain.html#elementsDisks
+.. _rbd create: ../rados-rbd-cmds#creating-a-block-device-image
+.. _User Management - User: ../../rados/operations/user-management#user
+.. _User Management - CLI: ../../rados/operations/user-management#command-line-usage
+.. _Virtio: http://www.linux-kvm.org/page/Virtio
diff --git a/doc/rbd/man/index.rst b/doc/rbd/man/index.rst
new file mode 100644
index 000000000..110273acc
--- /dev/null
+++ b/doc/rbd/man/index.rst
@@ -0,0 +1,16 @@
+============================
+ Ceph Block Device Manpages
+============================
+
+.. toctree::
+ :maxdepth: 1
+
+ rbd <../../man/8/rbd>
+ rbd-fuse <../../man/8/rbd-fuse>
+ rbd-nbd <../../man/8/rbd-nbd>
+ rbd-ggate <../../man/8/rbd-ggate>
+ rbd-map <../../man/8/rbdmap>
+ ceph-rbdnamer <../../man/8/ceph-rbdnamer>
+ rbd-replay-prep <../../man/8/rbd-replay-prep>
+ rbd-replay <../../man/8/rbd-replay>
+ rbd-replay-many <../../man/8/rbd-replay-many>
diff --git a/doc/rbd/qemu-rbd.rst b/doc/rbd/qemu-rbd.rst
new file mode 100644
index 000000000..f5925d6c3
--- /dev/null
+++ b/doc/rbd/qemu-rbd.rst
@@ -0,0 +1,220 @@
+========================
+ QEMU and Block Devices
+========================
+
+.. index:: Ceph Block Device; QEMU KVM
+
+The most frequent Ceph Block Device use case involves providing block device
+images to virtual machines. For example, a user may create a "golden" image
+with an OS and any relevant software in an ideal configuration. Then the user
+takes a snapshot of the image. Finally the user clones the snapshot (potentially
+many times). See `Snapshots`_ for details. The ability to make copy-on-write
+clones of a snapshot means that Ceph can provision block device images to
+virtual machines quickly, because the client doesn't have to download the entire
+image each time it spins up a new virtual machine.
+
+
+.. ditaa::
+
+ +---------------------------------------------------+
+ | QEMU |
+ +---------------------------------------------------+
+ | librbd |
+ +---------------------------------------------------+
+ | librados |
+ +------------------------+-+------------------------+
+ | OSDs | | Monitors |
+ +------------------------+ +------------------------+
+
+
+Ceph Block Devices attach to QEMU virtual machines. For details on
+QEMU, see `QEMU Open Source Processor Emulator`_. For QEMU documentation, see
+`QEMU Manual`_. For installation details, see `Installation`_.
+
+.. important:: To use Ceph Block Devices with QEMU, you must have access to a
+ running Ceph cluster.
+
+
+Usage
+=====
+
+The QEMU command line expects you to specify the Ceph pool and image name. You
+may also specify a snapshot.
+
+QEMU will assume that Ceph configuration resides in the default
+location (e.g., ``/etc/ceph/$cluster.conf``) and that you are executing
+commands as the default ``client.admin`` user unless you expressly specify
+another Ceph configuration file path or another user. When specifying a user,
+QEMU uses the ``ID`` rather than the full ``TYPE:ID``. See `User Management -
+User`_ for details. Do not prepend the client type (i.e., ``client.``) to the
+beginning of the user ``ID``, or you will receive an authentication error. You
+should have the key for the ``admin`` user or the key of another user you
+specify with the ``:id={user}`` option in a keyring file stored in default path
+(i.e., ``/etc/ceph`` or the local directory with appropriate file ownership and
+permissions. Usage takes the following form::
+
+ qemu-img {command} [options] rbd:{pool-name}/{image-name}[@snapshot-name][:option1=value1][:option2=value2...]
+
+For example, specifying the ``id`` and ``conf`` options might look like the following::
+
+ qemu-img {command} [options] rbd:glance-pool/maipo:id=glance:conf=/etc/ceph/ceph.conf
+
+.. tip:: Configuration values containing ``:``, ``@``, or ``=`` can be escaped with a
+ leading ``\`` character.
+
+
+Creating Images with QEMU
+=========================
+
+You can create a block device image from QEMU. You must specify ``rbd``, the
+pool name, and the name of the image you wish to create. You must also specify
+the size of the image. ::
+
+ qemu-img create -f raw rbd:{pool-name}/{image-name} {size}
+
+For example::
+
+ qemu-img create -f raw rbd:data/foo 10G
+
+.. important:: The ``raw`` data format is really the only sensible
+ ``format`` option to use with RBD. Technically, you could use other
+ QEMU-supported formats (such as ``qcow2`` or ``vmdk``), but doing
+ so would add additional overhead, and would also render the volume
+ unsafe for virtual machine live migration when caching (see below)
+ is enabled.
+
+
+Resizing Images with QEMU
+=========================
+
+You can resize a block device image from QEMU. You must specify ``rbd``,
+the pool name, and the name of the image you wish to resize. You must also
+specify the size of the image. ::
+
+ qemu-img resize rbd:{pool-name}/{image-name} {size}
+
+For example::
+
+ qemu-img resize rbd:data/foo 10G
+
+
+Retrieving Image Info with QEMU
+===============================
+
+You can retrieve block device image information from QEMU. You must
+specify ``rbd``, the pool name, and the name of the image. ::
+
+ qemu-img info rbd:{pool-name}/{image-name}
+
+For example::
+
+ qemu-img info rbd:data/foo
+
+
+Running QEMU with RBD
+=====================
+
+QEMU can pass a block device from the host on to a guest, but since
+QEMU 0.15, there's no need to map an image as a block device on
+the host. Instead, QEMU attaches an image as a virtual block
+device directly via ``librbd``. This strategy increases performance
+by avoiding context switches and taking advantage of `RBD caching`_.
+
+You can use ``qemu-img`` to convert existing virtual machine images to Ceph
+block device images. For example, if you have a qcow2 image, you could run::
+
+ qemu-img convert -f qcow2 -O raw debian_squeeze.qcow2 rbd:data/squeeze
+
+To run a virtual machine booting from that image, you could run::
+
+ qemu -m 1024 -drive format=raw,file=rbd:data/squeeze
+
+`RBD caching`_ can significantly improve performance.
+Since QEMU 1.2, QEMU's cache options control ``librbd`` caching::
+
+ qemu -m 1024 -drive format=rbd,file=rbd:data/squeeze,cache=writeback
+
+If you have an older version of QEMU, you can set the ``librbd`` cache
+configuration (like any Ceph configuration option) as part of the
+'file' parameter::
+
+ qemu -m 1024 -drive format=raw,file=rbd:data/squeeze:rbd_cache=true,cache=writeback
+
+.. important:: If you set rbd_cache=true, you must set cache=writeback
+ or risk data loss. Without cache=writeback, QEMU will not send
+ flush requests to librbd. If QEMU exits uncleanly in this
+ configuration, file systems on top of rbd can be corrupted.
+
+.. _RBD caching: ../rbd-config-ref/#rbd-cache-config-settings
+
+
+.. index:: Ceph Block Device; discard trim and libvirt
+
+Enabling Discard/TRIM
+=====================
+
+Since Ceph version 0.46 and QEMU version 1.1, Ceph Block Devices support the
+discard operation. This means that a guest can send TRIM requests to let a Ceph
+block device reclaim unused space. This can be enabled in the guest by mounting
+``ext4`` or ``XFS`` with the ``discard`` option.
+
+For this to be available to the guest, it must be explicitly enabled
+for the block device. To do this, you must specify a
+``discard_granularity`` associated with the drive::
+
+ qemu -m 1024 -drive format=raw,file=rbd:data/squeeze,id=drive1,if=none \
+ -device driver=ide-hd,drive=drive1,discard_granularity=512
+
+Note that this uses the IDE driver. The virtio driver does not
+support discard.
+
+If using libvirt, edit your libvirt domain's configuration file using ``virsh
+edit`` to include the ``xmlns:qemu`` value. Then, add a ``qemu:commandline``
+block as a child of that domain. The following example shows how to set two
+devices with ``qemu id=`` to different ``discard_granularity`` values.
+
+.. code-block:: xml
+
+ <domain type='kvm' xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'>
+ <qemu:commandline>
+ <qemu:arg value='-set'/>
+ <qemu:arg value='block.scsi0-0-0.discard_granularity=4096'/>
+ <qemu:arg value='-set'/>
+ <qemu:arg value='block.scsi0-0-1.discard_granularity=65536'/>
+ </qemu:commandline>
+ </domain>
+
+
+.. index:: Ceph Block Device; cache options
+
+QEMU Cache Options
+==================
+
+QEMU's cache options correspond to the following Ceph `RBD Cache`_ settings.
+
+Writeback::
+
+ rbd_cache = true
+
+Writethrough::
+
+ rbd_cache = true
+ rbd_cache_max_dirty = 0
+
+None::
+
+ rbd_cache = false
+
+QEMU's cache settings override Ceph's cache settings (including settings that
+are explicitly set in the Ceph configuration file).
+
+.. note:: Prior to QEMU v2.4.0, if you explicitly set `RBD Cache`_ settings
+ in the Ceph configuration file, your Ceph settings override the QEMU cache
+ settings.
+
+.. _QEMU Open Source Processor Emulator: http://wiki.qemu.org/Main_Page
+.. _QEMU Manual: http://wiki.qemu.org/Manual
+.. _RBD Cache: ../rbd-config-ref/
+.. _Snapshots: ../rbd-snapshot/
+.. _Installation: ../../install
+.. _User Management - User: ../../rados/operations/user-management#user
diff --git a/doc/rbd/rados-rbd-cmds.rst b/doc/rbd/rados-rbd-cmds.rst
new file mode 100644
index 000000000..0bbcb2611
--- /dev/null
+++ b/doc/rbd/rados-rbd-cmds.rst
@@ -0,0 +1,326 @@
+=============================
+ Basic Block Device Commands
+=============================
+
+.. index:: Ceph Block Device; image management
+
+The ``rbd`` command enables you to create, list, introspect and remove block
+device images. You can also use it to clone images, create snapshots,
+rollback an image to a snapshot, view a snapshot, etc. For details on using
+the ``rbd`` command, see `RBD – Manage RADOS Block Device (RBD) Images`_ for
+details.
+
+.. important:: To use Ceph Block Device commands, you must have access to
+ a running Ceph cluster.
+
+Create a Block Device Pool
+==========================
+
+#. Use the ``ceph`` tool to `create a pool`_.
+
+#. Use the ``rbd`` tool to initialize the pool for use by RBD:
+
+ .. prompt:: bash $
+
+ rbd pool init <pool-name>
+
+ .. note:: The ``rbd`` tool assumes a default pool name of 'rbd' if no pool
+ name is specified in the command.
+
+
+Create a Block Device User
+==========================
+
+Unless otherwise specified, the ``rbd`` command uses the Ceph user ID ``admin``
+to access the Ceph cluster. The ``admin`` Ceph user ID allows full
+administrative access to the cluster. We recommend that you acess the Ceph
+cluster with a Ceph user ID that has fewer permissions than the ``admin`` Ceph
+user ID does. We call this non-``admin`` Ceph user ID a "block device user" or
+"Ceph user".
+
+To `create a Ceph user`_, use the ``ceph auth get-or-create`` command to
+specify the Ceph user ID name, monitor caps (capabilities), and OSD caps
+(capabilities):
+
+.. prompt:: bash $
+
+ ceph auth get-or-create client.{ID} mon 'profile rbd' osd 'profile {profile name} [pool={pool-name}][, profile ...]' mgr 'profile rbd [pool={pool-name}]'
+
+For example: to create a Ceph user ID named ``qemu`` that has read-write access
+to the pool ``vms`` and read-only access to the pool ``images``, run the
+following command:
+
+.. prompt:: bash $
+
+ ceph auth get-or-create client.qemu mon 'profile rbd' osd 'profile rbd pool=vms, profile rbd-read-only pool=images' mgr 'profile rbd pool=images'
+
+The output from the ``ceph auth get-or-create`` command is the keyring for the
+specified Ceph user ID, which can be written to
+``/etc/ceph/ceph.client.{ID}.keyring``.
+
+.. note:: Specify the Ceph user ID by providing the ``--id {id} argument when
+ using the ``rbd`` command. This argument is optional.
+
+Creating a Block Device Image
+=============================
+
+Before you can add a block device to a node, you must create an image for it in
+the :term:`Ceph Storage Cluster`. To create a block device image, run a command of this form:
+
+.. prompt:: bash $
+
+ rbd create --size {megabytes} {pool-name}/{image-name}
+
+For example, to create a 1GB image named ``bar`` that stores information in a
+pool named ``swimmingpool``, run this command:
+
+.. prompt:: bash $
+
+ rbd create --size 1024 swimmingpool/bar
+
+If you don't specify a pool when you create an image, then the image will be
+stored in the default pool ``rbd``. For example, if you ran this command, you
+would create a 1GB image named ``foo`` that is stored in the default pool
+``rbd``:
+
+.. prompt:: bash $
+
+ rbd create --size 1024 foo
+
+.. note:: You must create a pool before you can specify it as a source. See
+ `Storage Pools`_ for details.
+
+Listing Block Device Images
+===========================
+
+To list block devices in the ``rbd`` pool, run the following command:
+
+.. prompt:: bash $
+
+ rbd ls
+
+.. note:: ``rbd`` is the default pool name, and ``rbd ls`` lists the commands
+ in the default pool.
+
+To list block devices in a particular pool, run the following command, but
+replace ``{poolname}`` with the name of the pool:
+
+.. prompt:: bash $
+
+ rbd ls {poolname}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd ls swimmingpool
+
+To list "deferred delete" block devices in the ``rbd`` pool, run the
+following command:
+
+.. prompt:: bash $
+
+ rbd trash ls
+
+To list "deferred delete" block devices in a particular pool, run the
+following command, but replace ``{poolname}`` with the name of the pool:
+
+.. prompt:: bash $
+
+ rbd trash ls {poolname}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd trash ls swimmingpool
+
+Retrieving Image Information
+============================
+
+To retrieve information from a particular image, run the following command, but
+replace ``{image-name}`` with the name for the image:
+
+.. prompt:: bash $
+
+ rbd info {image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd info foo
+
+To retrieve information from an image within a pool, run the following command,
+but replace ``{image-name}`` with the name of the image and replace
+``{pool-name}`` with the name of the pool:
+
+.. prompt:: bash $
+
+ rbd info {pool-name}/{image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd info swimmingpool/bar
+
+.. note:: Other naming conventions are possible, and might conflict with the
+ naming convention described here. For example, ``userid/<uuid>`` is a
+ possible name for an RBD image, and such a name might (at the least) be
+ confusing.
+
+Resizing a Block Device Image
+=============================
+
+:term:`Ceph Block Device` images are thin provisioned. They don't actually use
+any physical storage until you begin saving data to them. However, they do have
+a maximum capacity that you set with the ``--size`` option. If you want to
+increase (or decrease) the maximum size of a Ceph Block Device image, run one
+of the following commands:
+
+Increasing the Size of a Block Device Image
+-------------------------------------------
+
+.. prompt:: bash $
+
+ rbd resize --size 2048 foo
+
+Decreasing the Size of a Block Device Image
+-------------------------------------------
+
+.. prompt:: bash $
+
+ rbd resize --size 2048 foo --allow-shrink
+
+
+Removing a Block Device Image
+=============================
+
+To remove a block device, run the following command, but replace
+``{image-name}`` with the name of the image you want to remove:
+
+.. prompt:: bash $
+
+ rbd rm {image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd rm foo
+
+Removing a Block Device from a Pool
+-----------------------------------
+
+To remove a block device from a pool, run the following command but replace
+``{image-name}`` with the name of the image to be removed, and replace
+``{pool-name}`` with the name of the pool from which the image is to be
+removed:
+
+.. prompt:: bash $
+
+ rbd rm {pool-name}/{image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd rm swimmingpool/bar
+
+"Defer Deleting" a Block Device from a Pool
+-------------------------------------------
+
+To defer delete a block device from a pool (which entails moving it to the
+"trash" and deleting it later), run the following command but replace
+``{image-name}`` with the name of the image to be moved to the trash and
+replace ``{pool-name}`` with the name of the pool:
+
+.. prompt:: bash $
+
+ rbd trash mv {pool-name}/{image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd trash mv swimmingpool/bar
+
+Removing a Deferred Block Device from a Pool
+--------------------------------------------
+
+To remove a deferred block device from a pool, run the following command but
+replace ``{image-}`` with the ID of the image to be removed, and replace
+``{pool-name}`` with the name of the pool from which the image is to be
+removed:
+
+.. prompt:: bash $
+
+ rbd trash rm {pool-name}/{image-}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd trash rm swimmingpool/2bf4474b0dc51
+
+.. note::
+
+ * You can move an image to the trash even if it has snapshot(s) or is
+ actively in use by clones. However, you cannot remove it from the trash
+ under those conditions.
+
+ * You can use ``--expires-at`` to set the deferment time (default is
+ ``now``). If the deferment time has not yet arrived, you cannot remove the
+ image unless you use ``--force``.
+
+Restoring a Block Device Image
+==============================
+
+To restore a deferred delete block device in the rbd pool, run the
+following command but replace ``{image-id}`` with the ID of the image:
+
+.. prompt:: bash $
+
+ rbd trash restore {image-id}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd trash restore 2bf4474b0dc51
+
+Restoring a Block Device Image in a Specific Pool
+-------------------------------------------------
+
+To restore a deferred delete block device in a particular pool, run the
+following command but replace ``{image-id}`` with the ID of the image and
+replace ``{pool-name}`` with the name of the pool:
+
+.. prompt:: bash $
+
+ rbd trash restore {pool-name}/{image-id}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd trash restore swimmingpool/2bf4474b0dc51
+
+
+Renaming an Image While Restoring It
+------------------------------------
+
+You can also use ``--image`` to rename the image while restoring it.
+
+For example:
+
+.. prompt:: bash $
+
+ rbd trash restore swimmingpool/2bf4474b0dc51 --image new-name
+
+
+.. _create a pool: ../../rados/operations/pools/#create-a-pool
+.. _Storage Pools: ../../rados/operations/pools
+.. _RBD – Manage RADOS Block Device (RBD) Images: ../../man/8/rbd/
+.. _create a Ceph user: ../../rados/operations/user-management#add-a-user
diff --git a/doc/rbd/rbd-cloudstack.rst b/doc/rbd/rbd-cloudstack.rst
new file mode 100644
index 000000000..1b961234b
--- /dev/null
+++ b/doc/rbd/rbd-cloudstack.rst
@@ -0,0 +1,157 @@
+=============================
+ Block Devices and CloudStack
+=============================
+
+You may use Ceph Block Device images with CloudStack 4.0 and higher through
+``libvirt``, which configures the QEMU interface to ``librbd``. Ceph stripes
+block device images as objects across the cluster, which means that large Ceph
+Block Device images have better performance than a standalone server!
+
+To use Ceph Block Devices with CloudStack 4.0 and higher, you must install QEMU,
+``libvirt``, and CloudStack first. We recommend using a separate physical host
+for your CloudStack installation. CloudStack recommends a minimum of 4GB of RAM
+and a dual-core processor, but more CPU and RAM will perform better. The
+following diagram depicts the CloudStack/Ceph technology stack.
+
+
+.. ditaa::
+
+ +---------------------------------------------------+
+ | CloudStack |
+ +---------------------------------------------------+
+ | libvirt |
+ +------------------------+--------------------------+
+ |
+ | configures
+ v
+ +---------------------------------------------------+
+ | QEMU |
+ +---------------------------------------------------+
+ | librbd |
+ +---------------------------------------------------+
+ | librados |
+ +------------------------+-+------------------------+
+ | OSDs | | Monitors |
+ +------------------------+ +------------------------+
+
+.. important:: To use Ceph Block Devices with CloudStack, you must have
+ access to a running Ceph Storage Cluster.
+
+CloudStack integrates with Ceph's block devices to provide CloudStack with a
+back end for CloudStack's Primary Storage. The instructions below detail the
+setup for CloudStack Primary Storage.
+
+.. note:: We recommend installing with Ubuntu 14.04 or later so that
+ you can use package installation instead of having to compile
+ libvirt from source.
+
+Installing and configuring QEMU for use with CloudStack doesn't require any
+special handling. Ensure that you have a running Ceph Storage Cluster. Install
+QEMU and configure it for use with Ceph; then, install ``libvirt`` version
+0.9.13 or higher (you may need to compile from source) and ensure it is running
+with Ceph.
+
+
+.. note:: Ubuntu 14.04 and CentOS 7.2 will have ``libvirt`` with RBD storage
+ pool support enabled by default.
+
+.. index:: pools; CloudStack
+
+Create a Pool
+=============
+
+By default, Ceph block devices use the ``rbd`` pool. Create a pool for
+CloudStack NFS Primary Storage. Ensure your Ceph cluster is running, then create
+the pool. ::
+
+ ceph osd pool create cloudstack
+
+See `Create a Pool`_ for details on specifying the number of placement groups
+for your pools, and `Placement Groups`_ for details on the number of placement
+groups you should set for your pools.
+
+A newly created pool must be initialized prior to use. Use the ``rbd`` tool
+to initialize the pool::
+
+ rbd pool init cloudstack
+
+Create a Ceph User
+==================
+
+To access the Ceph cluster we require a Ceph user which has the correct
+credentials to access the ``cloudstack`` pool we just created. Although we could
+use ``client.admin`` for this, it's recommended to create a user with only
+access to the ``cloudstack`` pool. ::
+
+ ceph auth get-or-create client.cloudstack mon 'profile rbd' osd 'profile rbd pool=cloudstack'
+
+Use the information returned by the command in the next step when adding the
+Primary Storage.
+
+See `User Management`_ for additional details.
+
+Add Primary Storage
+===================
+
+To add a Ceph block device as Primary Storage, the steps include:
+
+#. Log in to the CloudStack UI.
+#. Click **Infrastructure** on the left side navigation bar.
+#. Select **View All** under **Primary Storage**.
+#. Click the **Add Primary Storage** button on the top right hand side.
+#. Fill in the following information, according to your infrastructure setup:
+
+ - Scope (i.e. Cluster or Zone-Wide).
+
+ - Zone.
+
+ - Pod.
+
+ - Cluster.
+
+ - Name of Primary Storage.
+
+ - For **Protocol**, select ``RBD``.
+
+ - For **Provider**, select the appropriate provider type (i.e. DefaultPrimary, SolidFire, SolidFireShared, or CloudByte). Depending on the provider chosen, fill out the information pertinent to your setup.
+
+#. Add cluster information (``cephx`` is supported).
+
+ - For **RADOS Monitor**, provide the IP address of a Ceph monitor node.
+
+ - For **RADOS Pool**, provide the name of an RBD pool.
+
+ - For **RADOS User**, provide a user that has sufficient rights to the RBD pool. Note: Do not include the ``client.`` part of the user.
+
+ - For **RADOS Secret**, provide the secret the user's secret.
+
+ - **Storage Tags** are optional. Use tags at your own discretion. For more information about storage tags in CloudStack, refer to `Storage Tags`_.
+
+#. Click **OK**.
+
+Create a Disk Offering
+======================
+
+To create a new disk offering, refer to `Create a New Disk Offering`_.
+Create a disk offering so that it matches the ``rbd`` tag.
+The ``StoragePoolAllocator`` will choose the ``rbd``
+pool when searching for a suitable storage pool. If the disk offering doesn't
+match the ``rbd`` tag, the ``StoragePoolAllocator`` may select the pool you
+created (e.g., ``cloudstack``).
+
+
+Limitations
+===========
+
+- CloudStack will only bind to one monitor (You can however create a Round Robin DNS record over multiple monitors)
+
+
+
+.. _Create a Pool: ../../rados/operations/pools#createpool
+.. _Placement Groups: ../../rados/operations/placement-groups
+.. _Install and Configure QEMU: ../qemu-rbd
+.. _Install and Configure libvirt: ../libvirt
+.. _KVM Hypervisor Host Installation: http://docs.cloudstack.apache.org/en/latest/installguide/hypervisor/kvm.html
+.. _Storage Tags: http://docs.cloudstack.apache.org/en/latest/adminguide/storage.html#storage-tags
+.. _Create a New Disk Offering: http://docs.cloudstack.apache.org/en/latest/adminguide/service_offerings.html#creating-a-new-disk-offering
+.. _User Management: ../../rados/operations/user-management
diff --git a/doc/rbd/rbd-config-ref.rst b/doc/rbd/rbd-config-ref.rst
new file mode 100644
index 000000000..777894cd6
--- /dev/null
+++ b/doc/rbd/rbd-config-ref.rst
@@ -0,0 +1,476 @@
+=======================
+ Config Settings
+=======================
+
+See `Block Device`_ for additional details.
+
+Generic IO Settings
+===================
+
+``rbd_compression_hint``
+
+:Description: Hint to send to the OSDs on write operations. If set to
+ ``compressible`` and the OSD ``bluestore_compression_mode``
+ setting is ``passive``, the OSD will attempt to compress data
+ If set to ``incompressible`` and the OSD compression setting
+ is ``aggressive``, the OSD will not attempt to compress data.
+:Type: Enum
+:Required: No
+:Default: ``none``
+:Values: ``none``, ``compressible``, ``incompressible``
+
+
+``rbd_read_from_replica_policy``
+
+:Description: Policy for determining which OSD will receive read operations.
+ If set to ``default``, each PG's primary OSD will always be used
+ for read operations. If set to ``balance``, read operations will
+ be sent to a randomly selected OSD within the replica set. If set
+ to ``localize``, read operations will be sent to the closest OSD
+ as determined by the CRUSH map. Note: this feature requires the
+ cluster to be configured with a minimum compatible OSD release of
+ Octopus.
+:Type: Enum
+:Required: No
+:Default: ``default``
+:Values: ``default``, ``balance``, ``localize``
+
+Cache Settings
+=======================
+
+.. sidebar:: Kernel Caching
+
+ The kernel driver for Ceph block devices can use the Linux page cache to
+ improve performance.
+
+The user space implementation of the Ceph block device (i.e., ``librbd``) cannot
+take advantage of the Linux page cache, so it includes its own in-memory
+caching, called "RBD caching." RBD caching behaves just like well-behaved hard
+disk caching. When the OS sends a barrier or a flush request, all dirty data is
+written to the OSDs. This means that using write-back caching is just as safe as
+using a well-behaved physical hard disk with a VM that properly sends flushes
+(i.e. Linux kernel >= 2.6.32). The cache uses a Least Recently Used (LRU)
+algorithm, and in write-back mode it can coalesce contiguous requests for
+better throughput.
+
+The librbd cache is enabled by default and supports three different cache
+policies: write-around, write-back, and write-through. Writes return
+immediately under both the write-around and write-back policies, unless there
+are more than ``rbd_cache_max_dirty`` unwritten bytes to the storage cluster.
+The write-around policy differs from the write-back policy in that it does
+not attempt to service read requests from the cache, unlike the write-back
+policy, and is therefore faster for high performance write workloads. Under the
+write-through policy, writes return only when the data is on disk on all
+replicas, but reads may come from the cache.
+
+Prior to receiving a flush request, the cache behaves like a write-through cache
+to ensure safe operation for older operating systems that do not send flushes to
+ensure crash consistent behavior.
+
+If the librbd cache is disabled, writes and
+reads go directly to the storage cluster, and writes return only when the data
+is on disk on all replicas.
+
+.. note::
+ The cache is in memory on the client, and each RBD image has
+ its own. Since the cache is local to the client, there's no coherency
+ if there are others accessing the image. Running GFS or OCFS on top of
+ RBD will not work with caching enabled.
+
+
+Option settings for RBD should be set in the ``[client]``
+section of your configuration file or the central config store. These settings
+include:
+
+``rbd_cache``
+
+:Description: Enable caching for RADOS Block Device (RBD).
+:Type: Boolean
+:Required: No
+:Default: ``true``
+
+
+``rbd_cache_policy``
+
+:Description: Select the caching policy for librbd.
+:Type: Enum
+:Required: No
+:Default: ``writearound``
+:Values: ``writearound``, ``writeback``, ``writethrough``
+
+
+``rbd_cache_writethrough_until_flush``
+
+:Description: Start out in ``writethrough`` mode, and switch to ``writeback``
+ after the first flush request is received. Enabling is a
+ conservative but safe strategy in case VMs running on RBD volumes
+ are too old to send flushes, like the ``virtio`` driver in Linux
+ kernels older than 2.6.32.
+:Type: Boolean
+:Required: No
+:Default: ``true``
+
+
+``rbd_cache_size``
+
+:Description: The per-volume RBD client cache size in bytes.
+:Type: 64-bit Integer
+:Required: No
+:Default: ``32 MiB``
+:Policies: write-back and write-through
+
+
+``rbd_cache_max_dirty``
+
+:Description: The ``dirty`` limit in bytes at which the cache triggers write-back. If ``0``, uses write-through caching.
+:Type: 64-bit Integer
+:Required: No
+:Constraint: Must be less than ``rbd_cache_size``.
+:Default: ``24 MiB``
+:Policies: write-around and write-back
+
+
+``rbd_cache_target_dirty``
+
+:Description: The ``dirty target`` before the cache begins writing data to the data storage. Does not block writes to the cache.
+:Type: 64-bit Integer
+:Required: No
+:Constraint: Must be less than ``rbd_cache_max_dirty``.
+:Default: ``16 MiB``
+:Policies: write-back
+
+
+``rbd_cache_max_dirty_age``
+
+:Description: The number of seconds dirty data is in the cache before writeback starts.
+:Type: Float
+:Required: No
+:Default: ``1.0``
+:Policies: write-back
+
+
+.. _Block Device: ../../rbd
+
+
+Read-ahead Settings
+=======================
+
+librbd supports read-ahead/prefetching to optimize small, sequential reads.
+This should normally be handled by the guest OS in the case of a VM,
+but boot loaders may not issue efficient reads. Read-ahead is automatically
+disabled if caching is disabled or if the policy is write-around.
+
+
+``rbd_readahead_trigger_requests``
+
+:Description: Number of sequential read requests necessary to trigger read-ahead.
+:Type: Integer
+:Required: No
+:Default: ``10``
+
+
+``rbd_readahead_max_bytes``
+
+:Description: Maximum size of a read-ahead request. If zero, read-ahead is disabled.
+:Type: 64-bit Integer
+:Required: No
+:Default: ``512 KiB``
+
+
+``rbd_readahead_disable_after_bytes``
+
+:Description: After this many bytes have been read from an RBD image, read-ahead
+ is disabled for that image until it is closed. This allows the
+ guest OS to take over read-ahead once it is booted. If zero,
+ read-ahead stays enabled.
+:Type: 64-bit Integer
+:Required: No
+:Default: ``50 MiB``
+
+
+Image Features
+==============
+
+RBD supports advanced features which can be specified via the command line when
+creating images or the default features can be configured via
+``rbd_default_features = <sum of feature numeric values>`` or
+``rbd_default_features = <comma-delimited list of CLI values>``.
+
+``Layering``
+
+:Description: Layering enables cloning.
+:Internal value: 1
+:CLI value: layering
+:Added in: v0.52 (Bobtail)
+:KRBD support: since v3.10
+:Default: yes
+
+``Striping v2``
+
+:Description: Striping spreads data across multiple objects. Striping helps with
+ parallelism for sequential read/write workloads.
+:Internal value: 2
+:CLI value: striping
+:Added in: v0.55 (Bobtail)
+:KRBD support: since v3.10 (default striping only, "fancy" striping added in v4.17)
+:Default: yes
+
+``Exclusive locking``
+
+:Description: When enabled, it requires a client to acquire a lock on an object
+ before making a write. Exclusive lock should only be enabled when
+ a single client is accessing an image at any given time.
+:Internal value: 4
+:CLI value: exclusive-lock
+:Added in: v0.92 (Hammer)
+:KRBD support: since v4.9
+:Default: yes
+
+``Object map``
+
+:Description: Object map support depends on exclusive lock support. Block
+ devices are thin provisioned, which means that they only store
+ data that actually has been written, ie. they are *sparse*. Object
+ map support helps track which objects actually exist (have data
+ stored on a device). Enabling object map support speeds up I/O
+ operations for cloning, importing and exporting a sparsely
+ populated image, and deleting.
+:Internal value: 8
+:CLI value: object-map
+:Added in: v0.93 (Hammer)
+:KRBD support: since v5.3
+:Default: yes
+
+
+``Fast-diff``
+
+:Description: Fast-diff support depends on object map support and exclusive lock
+ support. It adds another property to the object map, which makes
+ it much faster to generate diffs between snapshots of an image.
+ It is also much faster to calculate the actual data usage of a
+ snapshot or volume (``rbd du``).
+:Internal value: 16
+:CLI value: fast-diff
+:Added in: v9.0.1 (Infernalis)
+:KRBD support: since v5.3
+:Default: yes
+
+
+``Deep-flatten``
+
+:Description: Deep-flatten enables ``rbd flatten`` to work on all snapshots of
+ an image, in addition to the image itself. Without it, snapshots
+ of an image will still rely on the parent, so the parent cannot be
+ deleted until the snapshots are first deleted. Deep-flatten makes
+ a parent independent of its clones, even if they have snapshots,
+ at the expense of using additional OSD device space.
+:Internal value: 32
+:CLI value: deep-flatten
+:Added in: v9.0.2 (Infernalis)
+:KRBD support: since v5.1
+:Default: yes
+
+
+``Journaling``
+
+:Description: Journaling support depends on exclusive lock support. Journaling
+ records all modifications to an image in the order they occur. RBD
+ mirroring can utilize the journal to replicate a crash-consistent
+ image to a remote cluster. It is best to let ``rbd-mirror``
+ manage this feature only as needed, as enabling it long term may
+ result in substantial additional OSD space consumption.
+:Internal value: 64
+:CLI value: journaling
+:Added in: v10.0.1 (Jewel)
+:KRBD support: no
+:Default: no
+
+
+``Data pool``
+
+:Description: On erasure-coded pools, the image data block objects need to be stored on a separate pool from the image metadata.
+:Internal value: 128
+:Added in: v11.1.0 (Kraken)
+:KRBD support: since v4.11
+:Default: no
+
+
+``Operations``
+
+:Description: Used to restrict older clients from performing certain maintenance operations against an image (e.g. clone, snap create).
+:Internal value: 256
+:Added in: v13.0.2 (Mimic)
+:KRBD support: since v4.16
+
+
+``Migrating``
+
+:Description: Used to restrict older clients from opening an image when it is in migration state.
+:Internal value: 512
+:Added in: v14.0.1 (Nautilus)
+:KRBD support: no
+
+``Non-primary``
+
+:Description: Used to restrict changes to non-primary images using snapshot-based mirroring.
+:Internal value: 1024
+:Added in: v15.2.0 (Octopus)
+:KRBD support: no
+
+
+QOS Settings
+============
+
+librbd supports limiting per-image IO, controlled by the following
+settings.
+
+``rbd_qos_iops_limit``
+
+:Description: The desired limit of IO operations per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_bps_limit``
+
+:Description: The desired limit of IO bytes per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_read_iops_limit``
+
+:Description: The desired limit of read operations per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_write_iops_limit``
+
+:Description: The desired limit of write operations per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_read_bps_limit``
+
+:Description: The desired limit of read bytes per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_writ_bps_limit``
+
+:Description: The desired limit of write bytes per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_iops_burst``
+
+:Description: The desired burst limit of IO operations.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_bps_burst``
+
+:Description: The desired burst limit of IO bytes.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_read_iops_burst``
+
+:Description: The desired burst limit of read operations.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_write_iops_burst``
+
+:Description: The desired burst limit of write operations.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_read_bps_burst``
+
+:Description: The desired burst limit of read bytes per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_write_bps_burst``
+
+:Description: The desired burst limit of write bytes per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``rbd_qos_iops_burst_seconds``
+
+:Description: The desired burst duration in seconds of IO operations.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``1``
+
+
+``rbd_qos_bps_burst_seconds``
+
+:Description: The desired burst duration in seconds.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``1``
+
+
+``rbd_qos_read_iops_burst_seconds``
+
+:Description: The desired burst duration in seconds of read operations.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``1``
+
+
+``rbd_qos_write_iops_burst_seconds``
+
+:Description: The desired burst duration in seconds of write operations.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``1``
+
+
+``rbd_qos_read_bps_burst_seconds``
+
+:Description: The desired burst duration in seconds of read bytes.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``1``
+
+
+``rbd_qos_write_bps_burst_seconds``
+
+:Description: The desired burst duration in seconds of write bytes.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``1``
+
+
+``rbd_qos_schedule_tick_min``
+
+:Description: The minimum schedule tick (in milliseconds) for QoS.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``50``
diff --git a/doc/rbd/rbd-encryption.rst b/doc/rbd/rbd-encryption.rst
new file mode 100644
index 000000000..0628f3781
--- /dev/null
+++ b/doc/rbd/rbd-encryption.rst
@@ -0,0 +1,148 @@
+======================
+ Image Encryption
+======================
+
+.. index:: Ceph Block Device; encryption
+
+Starting with the Pacific release, image-level encryption can be handled
+internally by RBD clients. This means you can set a secret key that will be
+used to encrypt a specific RBD image. This page describes the scope of the
+RBD encryption feature.
+
+.. note::
+ The ``krbd`` kernel module does not support encryption at this time.
+
+.. note::
+ External tools (e.g. dm-crypt, QEMU) can be used as well to encrypt
+ an RBD image, and the feature set and limitation set for that use may be
+ different than described here.
+
+Encryption Format
+=================
+
+By default, RBD images are not encrypted. To encrypt an RBD image, it needs to
+be formatted to one of the supported encryption formats. The format operation
+persists encryption metadata to the image. The encryption metadata usually
+includes information such as the encryption format and version, cipher
+algorithm and mode specification, as well as information used to secure the
+encryption key. The encryption key itself is protected by a user-kept secret
+(usually a passphrase), which is never persisted. The basic encryption format
+operation will require specifying the encryption format and a secret.
+
+Some of the encryption metadata may be stored as part of the image data,
+typically an encryption header will be written to the beginning of the raw
+image data. This means that the effective image size of the encrypted image may
+be lower than the raw image size. See the `Supported Formats`_ section for more
+details.
+
+.. note::
+ Currently only flat images (i.e. not cloned) can be formatted.
+ Clones of an encrypted image are inherently encrypted using the same format
+ and secret.
+
+.. note::
+ Any data written to the image prior to its format may become unreadable,
+ though it may still occupy storage resources.
+
+.. note::
+ Images with the `journal feature`_ enabled cannot be formatted and encrypted
+ by RBD clients.
+
+Encryption Load
+=================
+
+Formatting an image is a necessary pre-requisite for enabling encryption.
+However, formatted images will still be treated as raw unencrypted images by
+all of the RBD APIs. In particular, an encrypted RBD image can be opened
+by the same APIs as any other image, and raw unencrypted data can be
+read / written. Such raw IOs may risk the integrity of the encryption format,
+for example by overriding encryption metadata located at the beginning of the
+image.
+
+In order to safely perform encrypted IO on the formatted image, an additional
+*encryption load* operation should be applied after opening the image. The
+encryption load operation requires supplying the encryption format and a secret
+for unlocking the encryption key. Following a successful encryption load
+operation, all IOs for the opened image will be encrypted / decrypted.
+For a cloned image, this includes IOs for ancestor images as well. The
+encryption key will be stored in-memory by the RBD client until the image is
+closed.
+
+.. note::
+ Once encryption has been loaded, no other encryption load / format
+ operations can be applied to the context of the opened image.
+
+.. note::
+ Once encryption has been loaded, API calls for retrieving the image size
+ using the opened image context will return the effective image size.
+
+.. note::
+ Encryption load can be automatically applied when mounting RBD images as
+ block devices via `rbd-nbd`_.
+
+Supported Formats
+=================
+
+LUKS
+~~~~~~~
+
+Both LUKS1 and LUKS2 are supported. The data layout is fully compliant with the
+LUKS specification. Thus, images formatted by RBD can be loaded using external
+LUKS-supporting tools such as dm-crypt or QEMU. Furthermore, existing LUKS
+data, created outside of RBD, can be imported (by copying the raw LUKS data
+into the image) and loaded by RBD encryption.
+
+.. note::
+ The LUKS formats are supported on Linux-based systems only.
+
+.. note::
+ Currently, only AES-128 and AES-256 encryption algorithms are supported.
+ Additionally, xts-plain64 is currently the only supported encryption mode.
+
+To use the LUKS format, start by formatting the image::
+
+ $ rbd encryption format {pool-name}/{image-name} {luks1|luks2} {passphrase-file} [–cipher-alg {aes-128 | aes-256}]
+
+The encryption format operation generates a LUKS header and writes it to the
+beginning of the image. The header is appended with a single keyslot holding a
+randomly-generated encryption key, and is protected by the passphrase read from
+`passphrase-file`.
+
+.. note::
+ If the content of `passphrase-file` ends with a newline character, it will
+ be stripped off.
+
+By default, AES-256 in xts-plain64 mode (which is the current recommended mode,
+and the usual default for other tools) will be used. The format operation
+allows selecting AES-128 as well. Adding / removing passphrases is currently
+not supported by RBD, but can be applied to the raw RBD data using compatible
+tools such as cryptsetup.
+
+The LUKS header size can vary (upto 136MiB in LUKS2), but is usually upto
+16MiB, depending on the version of `libcryptsetup` installed. For optimal
+performance, the encryption format will set the data offset to be aligned with
+the image object size. For example expect a minimum overhead of 8MiB if using
+an imageconfigured with an 8MiB object size.
+
+In LUKS1, sectors, which are the minimal encryption units, are fixed at 512
+bytes. LUKS2 supports larger sectors, and for better performance we set
+the default sector size to the maximum of 4KiB. Writes which are either smaller
+than a sector, or are not aligned to a sector start, will trigger a guarded
+read-modify-write chain on the client, with a considerable latency penalty.
+A batch of such unaligned writes can lead to IO races which will further
+deteriorate performance. Thus it is advisable to avoid using RBD encryption
+in cases where incoming writes cannot be guaranteed to be sector-aligned.
+
+To mount a LUKS-encrypted image run::
+
+ $ rbd -p {pool-name} device map -t nbd -o encryption-format={luks1|luks2},encryption-passphrase-file={passphrase-file}
+
+Note that for security reasons, both the encryption format and encryption load
+operations are CPU-intensive, and may take a few seconds to complete. For the
+encryption operations of actual image IO, assuming AES-NI is enabled,
+a relative small microseconds latency should be added, as well as a small
+increase in CPU utilization.
+
+.. _journal feature: ../rbd-mirroring/#enable-image-journaling-feature
+.. _Supported Formats: #supported-formats
+.. _rbd-nbd: ../../man/8/rbd-nbd
diff --git a/doc/rbd/rbd-exclusive-locks.rst b/doc/rbd/rbd-exclusive-locks.rst
new file mode 100644
index 000000000..03609c74f
--- /dev/null
+++ b/doc/rbd/rbd-exclusive-locks.rst
@@ -0,0 +1,86 @@
+.. _rbd-exclusive-locks:
+
+====================
+ RBD Exclusive Locks
+====================
+
+.. index:: Ceph Block Device; RBD exclusive locks; exclusive-lock
+
+Exclusive locks are mechanisms designed to prevent multiple processes from
+accessing the same Rados Block Device (RBD) in an uncoordinated fashion.
+Exclusive locks are used heavily in virtualization (where they prevent VMs from
+clobbering each other's writes) and in RBD mirroring (where they are a
+prerequisite for journaling).
+
+By default, exclusive locks are enabled on newly created images. This default
+can be overridden via the ``rbd_default_features`` configuration option or the
+``--image-feature`` option for ``rbd create``.
+
+.. note::
+ Many image features, including ``object-map`` and ``fast-diff``, depend upon
+ exclusive locking. Disabling the ``exclusive-lock`` feature will negatively
+ affect the performance of some operations.
+
+In order to ensure proper exclusive locking operations, any client using an RBD
+image whose ``exclusive-lock`` feature is enabled must have a CephX identity
+whose capabilities include ``profile rbd``.
+
+Exclusive locking is mostly transparent to the user.
+
+#. Whenever any ``librbd`` client process or kernel RBD client
+ starts using an RBD image on which exclusive locking has been
+ enabled, it obtains an exclusive lock on the image before the first
+ write.
+
+#. Whenever any such client process terminates gracefully, the process
+ relinquishes the lock automatically.
+
+#. This graceful termination enables another, subsequent, process to acquire
+ the lock and to write to the image.
+
+.. note::
+ It is possible for two or more concurrently running processes to open the
+ image and to read from it. The client acquires the exclusive lock only when
+ attempting to write to the image. To disable transparent lock transitions
+ between multiple clients, the client must acquire the lock by using the
+ ``RBD_LOCK_MODE_EXCLUSIVE`` flag.
+
+
+Blacklisting
+============
+
+Sometimes a client process (or, in case of a krbd client, a client node's
+kernel) that previously held an exclusive lock on an image does not terminate
+gracefully, but dies abruptly. This may be because the client process received
+a ``KILL`` or ``ABRT`` signal, or because the client node underwent a hard
+reboot or suffered a power failure. In cases like this, the exclusive lock is
+never gracefully released. This means that any new process that starts and
+attempts to use the device must break the previously held exclusive lock.
+
+However, a process (or kernel thread) may hang or merely lose network
+connectivity to the Ceph cluster for some amount of time. In that case,
+breaking the lock would be potentially catastrophic: the hung process or
+connectivity issue could resolve itself and the original process might then
+compete with one that started in the interim, thus accessing RBD data in an
+uncoordinated and destructive manner.
+
+In the event that a lock cannot be acquired in the standard graceful manner,
+the overtaking process not only breaks the lock but also blocklists the
+previous lock holder. This is negotiated between the new client process and the
+Ceph Monitor.
+
+* Upon receiving the blocklist request, the monitor instructs the relevant OSDs
+ to no longer serve requests from the old client process;
+* after the associated OSD map update is complete, the new client can break the
+ previously held lock;
+* after the new client has acquired the lock, it can commence writing
+ to the image.
+
+Blocklisting is thus a form of storage-level resource `fencing`_.
+
+In order for blocklisting to work, the client must have the ``osd
+blocklist`` capability. This capability is included in the ``profile
+rbd`` capability profile, which should be set generally on all Ceph
+:ref:`client identities <user-management>` using RBD.
+
+.. _fencing: https://en.wikipedia.org/wiki/Fencing_(computing)
diff --git a/doc/rbd/rbd-integrations.rst b/doc/rbd/rbd-integrations.rst
new file mode 100644
index 000000000..50d788d53
--- /dev/null
+++ b/doc/rbd/rbd-integrations.rst
@@ -0,0 +1,15 @@
+=========================================
+ Ceph Block Device 3rd Party Integration
+=========================================
+
+.. toctree::
+ :maxdepth: 1
+
+ Kernel Modules <rbd-ko>
+ QEMU <qemu-rbd>
+ libvirt <libvirt>
+ Kubernetes <rbd-kubernetes>
+ OpenStack <rbd-openstack>
+ CloudStack <rbd-cloudstack>
+ LIO iSCSI Gateway <iscsi-overview>
+ Windows <rbd-windows>
diff --git a/doc/rbd/rbd-ko.rst b/doc/rbd/rbd-ko.rst
new file mode 100644
index 000000000..70c407839
--- /dev/null
+++ b/doc/rbd/rbd-ko.rst
@@ -0,0 +1,59 @@
+==========================
+ Kernel Module Operations
+==========================
+
+.. index:: Ceph Block Device; kernel module
+
+.. important:: To use kernel module operations, you must have a running Ceph cluster.
+
+Get a List of Images
+====================
+
+To mount a block device image, first return a list of the images. ::
+
+ rbd list
+
+Map a Block Device
+==================
+
+Use ``rbd`` to map an image name to a kernel module. You must specify the
+image name, the pool name, and the user name. ``rbd`` will load RBD kernel
+module on your behalf if it's not already loaded. ::
+
+ sudo rbd device map {pool-name}/{image-name} --id {user-name}
+
+For example::
+
+ sudo rbd device map rbd/myimage --id admin
+
+If you use `cephx`_ authentication, you must also specify a secret. It may come
+from a keyring or a file containing the secret. ::
+
+ sudo rbd device map rbd/myimage --id admin --keyring /path/to/keyring
+ sudo rbd device map rbd/myimage --id admin --keyfile /path/to/file
+
+
+Show Mapped Block Devices
+=========================
+
+To show block device images mapped to kernel modules with the ``rbd``,
+specify ``device list`` arguments. ::
+
+ rbd device list
+
+
+Unmapping a Block Device
+========================
+
+To unmap a block device image with the ``rbd`` command, specify the
+``device unmap`` arguments and the device name (i.e., by convention the
+same as the block device image name). ::
+
+ sudo rbd device unmap /dev/rbd/{poolname}/{imagename}
+
+For example::
+
+ sudo rbd device unmap /dev/rbd/rbd/foo
+
+
+.. _cephx: ../../rados/operations/user-management/
diff --git a/doc/rbd/rbd-kubernetes.rst b/doc/rbd/rbd-kubernetes.rst
new file mode 100644
index 000000000..caaf77d64
--- /dev/null
+++ b/doc/rbd/rbd-kubernetes.rst
@@ -0,0 +1,314 @@
+==============================
+ Block Devices and Kubernetes
+==============================
+
+You may use Ceph Block Device images with Kubernetes v1.13 and later through
+`ceph-csi`_, which dynamically provisions RBD images to back Kubernetes
+`volumes`_ and maps these RBD images as block devices (optionally mounting
+a file system contained within the image) on worker nodes running
+`pods`_ that reference an RBD-backed volume. Ceph stripes block device images as
+objects across the cluster, which means that large Ceph Block Device images have
+better performance than a standalone server!
+
+To use Ceph Block Devices with Kubernetes v1.13 and higher, you must install
+and configure ``ceph-csi`` within your Kubernetes environment. The following
+diagram depicts the Kubernetes/Ceph technology stack.
+
+.. ditaa::
+ +---------------------------------------------------+
+ | Kubernetes |
+ +---------------------------------------------------+
+ | ceph--csi |
+ +------------------------+--------------------------+
+ |
+ | configures
+ v
+ +------------------------+ +------------------------+
+ | | | rbd--nbd |
+ | Kernel Modules | +------------------------+
+ | | | librbd |
+ +------------------------+-+------------------------+
+ | RADOS Protocol |
+ +------------------------+-+------------------------+
+ | OSDs | | Monitors |
+ +------------------------+ +------------------------+
+
+
+.. important::
+ ``ceph-csi`` uses the RBD kernel modules by default which may not support all
+ Ceph `CRUSH tunables`_ or `RBD image features`_.
+
+Create a Pool
+=============
+
+By default, Ceph block devices use the ``rbd`` pool. Create a pool for
+Kubernetes volume storage. Ensure your Ceph cluster is running, then create
+the pool. ::
+
+ $ ceph osd pool create kubernetes
+
+See `Create a Pool`_ for details on specifying the number of placement groups
+for your pools, and `Placement Groups`_ for details on the number of placement
+groups you should set for your pools.
+
+A newly created pool must be initialized prior to use. Use the ``rbd`` tool
+to initialize the pool::
+
+ $ rbd pool init kubernetes
+
+Configure ceph-csi
+==================
+
+Setup Ceph Client Authentication
+--------------------------------
+
+Create a new user for Kubernetes and `ceph-csi`. Execute the following and
+record the generated key::
+
+ $ ceph auth get-or-create client.kubernetes mon 'profile rbd' osd 'profile rbd pool=kubernetes' mgr 'profile rbd pool=kubernetes'
+ [client.kubernetes]
+ key = AQD9o0Fd6hQRChAAt7fMaSZXduT3NWEqylNpmg==
+
+Generate `ceph-csi` `ConfigMap`
+-------------------------------
+
+The `ceph-csi` requires a `ConfigMap` object stored in Kubernetes to define the
+the Ceph monitor addresses for the Ceph cluster. Collect both the Ceph cluster
+unique `fsid` and the monitor addresses::
+
+ $ ceph mon dump
+ <...>
+ fsid b9127830-b0cc-4e34-aa47-9d1a2e9949a8
+ <...>
+ 0: [v2:192.168.1.1:3300/0,v1:192.168.1.1:6789/0] mon.a
+ 1: [v2:192.168.1.2:3300/0,v1:192.168.1.2:6789/0] mon.b
+ 2: [v2:192.168.1.3:3300/0,v1:192.168.1.3:6789/0] mon.c
+
+.. note::
+ ``ceph-csi`` currently only supports the `legacy V1 protocol`_.
+
+Generate a `csi-config-map.yaml` file similar to the example below, substituting
+the `fsid` for "clusterID", and the monitor addresses for "monitors"::
+
+ $ cat <<EOF > csi-config-map.yaml
+ ---
+ apiVersion: v1
+ kind: ConfigMap
+ data:
+ config.json: |-
+ [
+ {
+ "clusterID": "b9127830-b0cc-4e34-aa47-9d1a2e9949a8",
+ "monitors": [
+ "192.168.1.1:6789",
+ "192.168.1.2:6789",
+ "192.168.1.3:6789"
+ ]
+ }
+ ]
+ metadata:
+ name: ceph-csi-config
+ EOF
+
+Once generated, store the new `ConfigMap` object in Kubernetes::
+
+ $ kubectl apply -f csi-config-map.yaml
+
+Generate `ceph-csi` cephx `Secret`
+----------------------------------
+
+`ceph-csi` requires the cephx credentials for communicating with the Ceph
+cluster. Generate a `csi-rbd-secret.yaml` file similar to the example below,
+using the newly created Kubernetes user id and cephx key::
+
+ $ cat <<EOF > csi-rbd-secret.yaml
+ ---
+ apiVersion: v1
+ kind: Secret
+ metadata:
+ name: csi-rbd-secret
+ namespace: default
+ stringData:
+ userID: kubernetes
+ userKey: AQD9o0Fd6hQRChAAt7fMaSZXduT3NWEqylNpmg==
+ EOF
+
+Once generated, store the new `Secret` object in Kubernetes::
+
+ $ kubectl apply -f csi-rbd-secret.yaml
+
+Configure `ceph-csi` Plugins
+----------------------------
+
+Create the required `ServiceAccount` and RBAC `ClusterRole`/`ClusterRoleBinding`
+Kubernetes objects. These objects do not necessarily need to be customized for
+your Kubernetes environment and therefore can be used as-is from the `ceph-csi`
+deployment YAMLs::
+
+ $ kubectl apply -f https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-provisioner-rbac.yaml
+ $ kubectl apply -f https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-nodeplugin-rbac.yaml
+
+Finally, create the `ceph-csi` provisioner and node plugins. With the
+possible exception of the `ceph-csi` container release version, these objects do
+not necessarily need to be customized for your Kubernetes environment and
+therefore can be used as-is from the `ceph-csi` deployment YAMLs::
+
+ $ wget https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml
+ $ kubectl apply -f csi-rbdplugin-provisioner.yaml
+ $ wget https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-rbdplugin.yaml
+ $ kubectl apply -f csi-rbdplugin.yaml
+
+.. important::
+ The provisioner and node plugin YAMLs will, by default, pull the development
+ release of the `ceph-csi` container (quay.io/cephcsi/cephcsi:canary).
+ The YAMLs should be updated to use a release version container for
+ production workloads.
+
+Using Ceph Block Devices
+========================
+
+Create a `StorageClass`
+-----------------------
+
+The Kubernetes `StorageClass` defines a class of storage. Multiple `StorageClass`
+objects can be created to map to different quality-of-service levels (i.e. NVMe
+vs HDD-based pools) and features.
+
+For example, to create a `ceph-csi` `StorageClass` that maps to the `kubernetes`
+pool created above, the following YAML file can be used after ensuring that the
+"clusterID" property matches your Ceph cluster's `fsid`::
+
+ $ cat <<EOF > csi-rbd-sc.yaml
+ ---
+ apiVersion: storage.k8s.io/v1
+ kind: StorageClass
+ metadata:
+ name: csi-rbd-sc
+ provisioner: rbd.csi.ceph.com
+ parameters:
+ clusterID: b9127830-b0cc-4e34-aa47-9d1a2e9949a8
+ pool: kubernetes
+ csi.storage.k8s.io/provisioner-secret-name: csi-rbd-secret
+ csi.storage.k8s.io/provisioner-secret-namespace: default
+ csi.storage.k8s.io/node-stage-secret-name: csi-rbd-secret
+ csi.storage.k8s.io/node-stage-secret-namespace: default
+ reclaimPolicy: Delete
+ mountOptions:
+ - discard
+ EOF
+ $ kubectl apply -f csi-rbd-sc.yaml
+
+Create a `PersistentVolumeClaim`
+--------------------------------
+
+A `PersistentVolumeClaim` is a request for abstract storage resources by a user.
+The `PersistentVolumeClaim` would then be associated to a `Pod` resource to
+provision a `PersistentVolume`, which would be backed by a Ceph block image.
+An optional `volumeMode` can be included to select between a mounted file system
+(default) or raw block device-based volume.
+
+Using `ceph-csi`, specifying `Filesystem` for `volumeMode` can support both
+`ReadWriteOnce` and `ReadOnlyMany` `accessMode` claims, and specifying `Block`
+for `volumeMode` can support `ReadWriteOnce`, `ReadWriteMany`, and
+`ReadOnlyMany` `accessMode` claims.
+
+For example, to create a block-based `PersistentVolumeClaim` that utilizes
+the `ceph-csi`-based `StorageClass` created above, the following YAML can be
+used to request raw block storage from the `csi-rbd-sc` `StorageClass`::
+
+ $ cat <<EOF > raw-block-pvc.yaml
+ ---
+ apiVersion: v1
+ kind: PersistentVolumeClaim
+ metadata:
+ name: raw-block-pvc
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ volumeMode: Block
+ resources:
+ requests:
+ storage: 1Gi
+ storageClassName: csi-rbd-sc
+ EOF
+ $ kubectl apply -f raw-block-pvc.yaml
+
+The following demonstrates and example of binding the above
+`PersistentVolumeClaim` to a `Pod` resource as a raw block device::
+
+ $ cat <<EOF > raw-block-pod.yaml
+ ---
+ apiVersion: v1
+ kind: Pod
+ metadata:
+ name: pod-with-raw-block-volume
+ spec:
+ containers:
+ - name: fc-container
+ image: fedora:26
+ command: ["/bin/sh", "-c"]
+ args: ["tail -f /dev/null"]
+ volumeDevices:
+ - name: data
+ devicePath: /dev/xvda
+ volumes:
+ - name: data
+ persistentVolumeClaim:
+ claimName: raw-block-pvc
+ EOF
+ $ kubectl apply -f raw-block-pod.yaml
+
+To create a file-system-based `PersistentVolumeClaim` that utilizes the
+`ceph-csi`-based `StorageClass` created above, the following YAML can be used to
+request a mounted file system (backed by an RBD image) from the `csi-rbd-sc`
+`StorageClass`::
+
+ $ cat <<EOF > pvc.yaml
+ ---
+ apiVersion: v1
+ kind: PersistentVolumeClaim
+ metadata:
+ name: rbd-pvc
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ volumeMode: Filesystem
+ resources:
+ requests:
+ storage: 1Gi
+ storageClassName: csi-rbd-sc
+ EOF
+ $ kubectl apply -f pvc.yaml
+
+The following demonstrates and example of binding the above
+`PersistentVolumeClaim` to a `Pod` resource as a mounted file system::
+
+ $ cat <<EOF > pod.yaml
+ ---
+ apiVersion: v1
+ kind: Pod
+ metadata:
+ name: csi-rbd-demo-pod
+ spec:
+ containers:
+ - name: web-server
+ image: nginx
+ volumeMounts:
+ - name: mypvc
+ mountPath: /var/lib/www/html
+ volumes:
+ - name: mypvc
+ persistentVolumeClaim:
+ claimName: rbd-pvc
+ readOnly: false
+ EOF
+ $ kubectl apply -f pod.yaml
+
+.. _ceph-csi: https://github.com/ceph/ceph-csi/
+.. _volumes: https://kubernetes.io/docs/concepts/storage/volumes/
+.. _pods: https://kubernetes.io/docs/concepts/workloads/pods/pod-overview/
+.. _Create a Pool: ../../rados/operations/pools#createpool
+.. _Placement Groups: ../../rados/operations/placement-groups
+.. _CRUSH tunables: ../../rados/operations/crush-map/#tunables
+.. _RBD image features: ../rbd-config-ref/#image-features
+.. _legacy V1 protocol: ../../rados/configuration/msgr2/#address-formats
diff --git a/doc/rbd/rbd-live-migration.rst b/doc/rbd/rbd-live-migration.rst
new file mode 100644
index 000000000..c3e09193d
--- /dev/null
+++ b/doc/rbd/rbd-live-migration.rst
@@ -0,0 +1,367 @@
+======================
+ Image Live-Migration
+======================
+
+.. index:: Ceph Block Device; live-migration
+
+RBD images can be live-migrated between different pools within the same cluster;
+between different image formats and layouts; or from external data sources.
+When started, the source will be deep-copied to the destination image, pulling
+all snapshot history while preserving the sparse allocation of data where
+possible.
+
+By default, when live-migrating RBD images within the same Ceph cluster, the
+source image will be marked read-only and all clients will instead redirect
+IOs to the new target image. In addition, this mode can optionally preserve the
+link to the source image's parent to preserve sparseness, or it can flatten the
+image during the migration to remove the dependency on the source image's
+parent.
+
+The live-migration process can also be used in an import-only mode where the
+source image remains unmodified and the target image can be linked to an
+external data source such as a backing file, HTTP(s) file, or S3 object.
+
+The live-migration copy process can safely run in the background while the new
+target image is in use. There is currently a requirement to temporarily stop
+using the source image before preparing a migration when not using the
+import-only mode of operation. This helps to ensure that the client using the
+image is updated to point to the new target image.
+
+.. note::
+ Image live-migration requires the Ceph Nautilus release or later. Support for
+ external data sources requires the Ceph Pacific release of later. The
+ ``krbd`` kernel module does not support live-migration at this time.
+
+
+.. ditaa::
+
+ +-------------+ +-------------+
+ | {s} c999 | | {s} |
+ | Live | Target refers | Live |
+ | migration |<-------------*| migration |
+ | source | to Source | target |
+ | | | |
+ | (read only) | | (writable) |
+ +-------------+ +-------------+
+
+ Source Target
+
+The live-migration process is comprised of three steps:
+
+#. **Prepare Migration:** The initial step creates the new target image and
+ links the target image to the source. When not configured in the import-only
+ mode, the source image will also be linked to the target image and marked
+ read-only.
+
+ Similar to `layered images`_, attempts to read uninitialized data extents
+ within the target image will internally redirect the read to the source
+ image, and writes to uninitialized extents within the target will internally
+ deep-copy the overlapping source image block to the target image.
+
+
+#. **Execute Migration:** This is a background operation that deep-copies all
+ initialized blocks from the source image to the target. This step can be
+ run while clients are actively using the new target image.
+
+
+#. **Finish Migration:** Once the background migration process has completed,
+ the migration can be committed or aborted. Committing the migration will
+ remove the cross-links between the source and target images, and will
+ remove the source image if not configured in the import-only mode. Aborting
+ the migration will remove the cross-links, and will remove the target image.
+
+Prepare Migration
+=================
+
+The default live-migration process for images within the same Ceph cluster is
+initiated by running the `rbd migration prepare` command, providing the source
+and target images::
+
+ $ rbd migration prepare migration_source [migration_target]
+
+The `rbd migration prepare` command accepts all the same layout optionals as the
+`rbd create` command, which allows changes to the immutable image on-disk
+layout. The `migration_target` can be skipped if the goal is only to change the
+on-disk layout, keeping the original image name.
+
+All clients using the source image must be stopped prior to preparing a
+live-migration. The prepare step will fail if it finds any running clients with
+the image open in read/write mode. Once the prepare step is complete, the
+clients can be restarted using the new target image name. Attempting to restart
+the clients using the source image name will result in failure.
+
+The `rbd status` command will show the current state of the live-migration::
+
+ $ rbd status migration_target
+ Watchers: none
+ Migration:
+ source: rbd/migration_source (5e2cba2f62e)
+ destination: rbd/migration_target (5e2ed95ed806)
+ state: prepared
+
+Note that the source image will be moved to the RBD trash to avoid mistaken
+usage during the migration process::
+
+ $ rbd info migration_source
+ rbd: error opening image migration_source: (2) No such file or directory
+ $ rbd trash ls --all
+ 5e2cba2f62e migration_source
+
+
+Prepare Import-Only Migration
+=============================
+
+The import-only live-migration process is initiated by running the same
+`rbd migration prepare` command, but adding the `--import-only` optional
+and providing a JSON-encoded ``source-spec`` to describe how to access
+the source image data. This ``source-spec`` can either be passed
+directly via the `--source-spec` optional, or via a file or STDIN via the
+`--source-spec-path` optional::
+
+ $ rbd migration prepare --import-only --source-spec "<JSON>" migration_target
+
+The `rbd migration prepare` command accepts all the same layout optionals as the
+`rbd create` command.
+
+The `rbd status` command will show the current state of the live-migration::
+
+ $ rbd status migration_target
+ Watchers: none
+ Migration:
+ source: {"stream":{"file_path":"/mnt/image.raw","type":"file"},"type":"raw"}
+ destination: rbd/migration_target (ac69113dc1d7)
+ state: prepared
+
+The general format for the ``source-spec`` JSON is as follows::
+
+ {
+ "type": "<format-type>",
+ <format unique parameters>
+ "stream": {
+ "type": "<stream-type>",
+ <stream unique parameters>
+ }
+ }
+
+The following formats are currently supported: ``native``, ``qcow``, and
+``raw``. The following streams are currently supported: ``file``, ``http``, and
+``s3``.
+
+Formats
+~~~~~~~
+
+The ``native`` format can be used to describe a native RBD image within a
+Ceph cluster as the source image. Its ``source-spec`` JSON is encoded
+as follows::
+
+ {
+ "type": "native",
+ "pool_name": "<pool-name>",
+ ["pool_id": <pool-id>,] (optional alternative to "pool_name")
+ ["pool_namespace": "<pool-namespace",] (optional)
+ "image_name": "<image-name>",
+ ["image_id": "<image-id>",] (optional if image in trash)
+ "snap_name": "<snap-name>",
+ ["snap_id": "<snap-id>",] (optional alternative to "snap_name")
+ }
+
+Note that the ``native`` format does not include the ``stream`` object since
+it utilizes native Ceph operations. For example, to import from the image
+``rbd/ns1/image1@snap1``, the ``source-spec`` could be encoded as::
+
+ {
+ "type": "native",
+ "pool_name": "rbd",
+ "pool_namespace": "ns1",
+ "image_name": "image1",
+ "snap_name": "snap1"
+ }
+
+The ``qcow`` format can be used to describe a QCOW (QEMU copy-on-write) block
+device. Both the QCOW (v1) and QCOW2 formats are currently supported with the
+exception of advanced features such as compression, encryption, backing
+files, and external data files. Support for these missing features may be added
+in a future release. The ``qcow`` format data can be linked to any supported
+stream source described below. For example, its base ``source-spec`` JSON is
+encoded as follows::
+
+ {
+ "type": "qcow",
+ "stream": {
+ <stream unique parameters>
+ }
+ }
+
+The ``raw`` format can be used to describe a thick-provisioned, raw block device
+export (i.e. `rbd export --export-format 1 <snap-spec>`). The ``raw`` format
+data can be linked to any supported stream source described below. For example,
+its base ``source-spec`` JSON is encoded as follows::
+
+ {
+ "type": "raw",
+ "stream": {
+ <stream unique parameters for HEAD, non-snapshot revision>
+ },
+ "snapshots": [
+ {
+ "type": "raw",
+ "name": "<snapshot-name>",
+ "stream": {
+ <stream unique parameters for snapshot>
+ }
+ },
+ ] (optional oldest to newest ordering of snapshots)
+ }
+
+The inclusion of the ``snapshots`` array is optional and currently only supports
+thick-provisioned ``raw`` snapshot exports.
+
+Additional formats such as RBD export-format v2 and RBD export-diff
+snapshots will be added in a future release.
+
+Streams
+~~~~~~~
+
+The ``file`` stream can be used to import from a locally accessible POSIX file
+source. Its ``source-spec`` JSON is encoded as follows::
+
+ {
+ <format unique parameters>
+ "stream": {
+ "type": "file",
+ "file_path": "<file-path>"
+ }
+ }
+
+For example, to import a raw-format image from a file located at
+"/mnt/image.raw", its ``source-spec`` JSON is encoded as follows::
+
+ {
+ "type": "raw",
+ "stream": {
+ "type": "file",
+ "file_path": "/mnt/image.raw"
+ }
+ }
+
+The ``http`` stream can be used to import from a remote HTTP or HTTPS web
+server. Its ``source-spec`` JSON is encoded as follows::
+
+ {
+ <format unique parameters>
+ "stream": {
+ "type": "http",
+ "url": "<url-path>"
+ }
+ }
+
+For example, to import a raw-format image from a file located at
+``http://download.ceph.com/image.raw``, its ``source-spec`` JSON is encoded
+as follows::
+
+ {
+ "type": "raw",
+ "stream": {
+ "type": "http",
+ "url": "http://download.ceph.com/image.raw"
+ }
+ }
+
+The ``s3`` stream can be used to import from a remote S3 bucket. Its
+``source-spec`` JSON is encoded as follows::
+
+ {
+ <format unique parameters>
+ "stream": {
+ "type": "s3",
+ "url": "<url-path>",
+ "access_key": "<access-key>",
+ "secret_key": "<secret-key>"
+ }
+ }
+
+For example, to import a raw-format image from a file located at
+`http://s3.ceph.com/bucket/image.raw`, its ``source-spec`` JSON is encoded
+as follows::
+
+ {
+ "type": "raw",
+ "stream": {
+ "type": "s3",
+ "url": "http://s3.ceph.com/bucket/image.raw",
+ "access_key": "NX5QOQKC6BH2IDN8HC7A",
+ "secret_key": "LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn"
+ }
+ }
+
+.. note::
+ The ``access_key`` and ``secret_key`` parameters support storing the keys in
+ the MON config-key store by prefixing the key values with ``config://``
+ followed by the path in the MON config-key store to the value. Values can be
+ stored in the config-key store via ``ceph config-key set <key-path> <value>``
+ (e.g. ``ceph config-key set rbd/s3/access_key NX5QOQKC6BH2IDN8HC7A``).
+
+Execute Migration
+=================
+
+After preparing the live-migration, the image blocks from the source image
+must be copied to the target image. This is accomplished by running the
+`rbd migration execute` command::
+
+ $ rbd migration execute migration_target
+ Image migration: 100% complete...done.
+
+The `rbd status` command will also provide feedback on the progress of the
+migration block deep-copy process::
+
+ $ rbd status migration_target
+ Watchers:
+ watcher=1.2.3.4:0/3695551461 client.123 cookie=123
+ Migration:
+ source: rbd/migration_source (5e2cba2f62e)
+ destination: rbd/migration_target (5e2ed95ed806)
+ state: executing (32% complete)
+
+
+Commit Migration
+================
+
+Once the live-migration has completed deep-copying all data blocks from the
+source image to the target, the migration can be committed::
+
+ $ rbd status migration_target
+ Watchers: none
+ Migration:
+ source: rbd/migration_source (5e2cba2f62e)
+ destination: rbd/migration_target (5e2ed95ed806)
+ state: executed
+ $ rbd migration commit migration_target
+ Commit image migration: 100% complete...done.
+
+If the `migration_source` image is a parent of one or more clones, the `--force`
+option will need to be specified after ensuring all descendent clone images are
+not in use.
+
+Committing the live-migration will remove the cross-links between the source
+and target images, and will remove the source image::
+
+ $ rbd trash list --all
+
+
+Abort Migration
+===============
+
+If you wish to revert the prepare or execute step, run the `rbd migration abort`
+command to revert the migration process::
+
+ $ rbd migration abort migration_target
+ Abort image migration: 100% complete...done.
+
+Aborting the migration will result in the target image being deleted and access
+to the original source image being restored::
+
+ $ rbd ls
+ migration_source
+
+
+.. _layered images: ../rbd-snapshot/#layering
diff --git a/doc/rbd/rbd-mirroring.rst b/doc/rbd/rbd-mirroring.rst
new file mode 100644
index 000000000..d5d1191d6
--- /dev/null
+++ b/doc/rbd/rbd-mirroring.rst
@@ -0,0 +1,529 @@
+===============
+ RBD Mirroring
+===============
+
+.. index:: Ceph Block Device; mirroring
+
+RBD images can be asynchronously mirrored between two Ceph clusters. This
+capability is available in two modes:
+
+* **Journal-based**: This mode uses the RBD journaling image feature to ensure
+ point-in-time, crash-consistent replication between clusters. Every write to
+ the RBD image is first recorded to the associated journal before modifying the
+ actual image. The remote cluster will read from this associated journal and
+ replay the updates to its local copy of the image. Since each write to the
+ RBD image will result in two writes to the Ceph cluster, expect write
+ latencies to nearly double while using the RBD journaling image feature.
+
+* **Snapshot-based**: This mode uses periodically scheduled or manually
+ created RBD image mirror-snapshots to replicate crash-consistent RBD images
+ between clusters. The remote cluster will determine any data or metadata
+ updates between two mirror-snapshots and copy the deltas to its local copy of
+ the image. With the help of the RBD ``fast-diff`` image feature, updated data
+ blocks can be quickly determined without the need to scan the full RBD image.
+ Since this mode is not as fine-grained as journaling, the complete delta
+ between two snapshots will need to be synced prior to use during a failover
+ scenario. Any partially applied set of deltas will be rolled back at moment
+ of failover.
+
+.. note:: journal-based mirroring requires the Ceph Jewel release or later;
+ snapshot-based mirroring requires the Ceph Octopus release or later.
+
+Mirroring is configured on a per-pool basis within peer clusters and can be
+configured on a specific subset of images within the pool. You can also mirror
+all images within a given pool when using journal-based
+mirroring. Mirroring is configured using the ``rbd`` command. The
+``rbd-mirror`` daemon is responsible for pulling image updates from the remote
+peer cluster and applying them to the image within the local cluster.
+
+Depending on the desired needs for replication, RBD mirroring can be configured
+for either one- or two-way replication:
+
+* **One-way Replication**: When data is only mirrored from a primary cluster to
+ a secondary cluster, the ``rbd-mirror`` daemon runs only on the secondary
+ cluster.
+
+* **Two-way Replication**: When data is mirrored from primary images on one
+ cluster to non-primary images on another cluster (and vice-versa), the
+ ``rbd-mirror`` daemon runs on both clusters.
+
+.. important:: Each instance of the ``rbd-mirror`` daemon must be able to
+ connect to both the local and remote Ceph clusters simultaneously (i.e.
+ all monitor and OSD hosts). Additionally, the network must have sufficient
+ bandwidth between the two data centers to handle mirroring workload.
+
+Pool Configuration
+==================
+
+The following procedures demonstrate how to perform the basic administrative
+tasks to configure mirroring using the ``rbd`` command. Mirroring is
+configured on a per-pool basis.
+
+These pool configuration steps should be performed on both peer clusters. These
+procedures assume that both clusters, named "site-a" and "site-b", are accessible
+from a single host for clarity.
+
+See the `rbd`_ manpage for additional details of how to connect to different
+Ceph clusters.
+
+.. note:: The cluster name in the following examples corresponds to a Ceph
+ configuration file of the same name (e.g. /etc/ceph/site-b.conf). See the
+ `ceph-conf`_ documentation for how to configure multiple clusters. Note
+ that ``rbd-mirror`` does **not** require the source and destination clusters
+ to have unique internal names; both can and should call themselves ``ceph``.
+ The config `files` that ``rbd-mirror`` needs for local and remote clusters
+ can be named arbitrarily, and containerizing the daemon is one strategy
+ for maintaining them outside of ``/etc/ceph`` to avoid confusion.
+
+Enable Mirroring
+----------------
+
+To enable mirroring on a pool with ``rbd``, issue the ``mirror pool enable``
+subcommand with the pool name, and the mirroring mode::
+
+ rbd mirror pool enable {pool-name} {mode}
+
+The mirroring mode can either be ``image`` or ``pool``:
+
+* **image**: When configured in ``image`` mode, mirroring must
+ `explicitly enabled`_ on each image.
+* **pool** (default): When configured in ``pool`` mode, all images in the pool
+ with the journaling feature enabled are mirrored.
+
+For example::
+
+ $ rbd --cluster site-a mirror pool enable image-pool image
+ $ rbd --cluster site-b mirror pool enable image-pool image
+
+Disable Mirroring
+-----------------
+
+To disable mirroring on a pool with ``rbd``, specify the ``mirror pool disable``
+command and the pool name::
+
+ rbd mirror pool disable {pool-name}
+
+When mirroring is disabled on a pool in this way, mirroring will also be
+disabled on any images (within the pool) for which mirroring was enabled
+explicitly.
+
+For example::
+
+ $ rbd --cluster site-a mirror pool disable image-pool
+ $ rbd --cluster site-b mirror pool disable image-pool
+
+Bootstrap Peers
+---------------
+
+In order for the ``rbd-mirror`` daemon to discover its peer cluster, the peer
+must be registered and a user account must be created.
+This process can be automated with ``rbd`` and the
+``mirror pool peer bootstrap create`` and ``mirror pool peer bootstrap import``
+commands.
+
+To manually create a new bootstrap token with ``rbd``, issue the
+``mirror pool peer bootstrap create`` subcommand, a pool name, and an
+optional friendly site name to describe the local cluster::
+
+ rbd mirror pool peer bootstrap create [--site-name {local-site-name}] {pool-name}
+
+The output of ``mirror pool peer bootstrap create`` will be a token that should
+be provided to the ``mirror pool peer bootstrap import`` command. For example,
+on site-a::
+
+ $ rbd --cluster site-a mirror pool peer bootstrap create --site-name site-a image-pool
+ eyJmc2lkIjoiOWY1MjgyZGItYjg5OS00NTk2LTgwOTgtMzIwYzFmYzM5NmYzIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFBUnczOWQwdkhvQmhBQVlMM1I4RmR5dHNJQU50bkFTZ0lOTVE9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMS4zOjY4MjAsdjE6MTkyLjE2OC4xLjM6NjgyMV0ifQ==
+
+To manually import the bootstrap token created by another cluster with ``rbd``,
+specify the ``mirror pool peer bootstrap import`` command, the pool name, a file
+path to the created token (or '-' to read from standard input), along with an
+optional friendly site name to describe the local cluster and a mirroring
+direction (defaults to rx-tx for bidirectional mirroring, but can also be set
+to rx-only for unidirectional mirroring)::
+
+ rbd mirror pool peer bootstrap import [--site-name {local-site-name}] [--direction {rx-only or rx-tx}] {pool-name} {token-path}
+
+For example, on site-b::
+
+ $ cat <<EOF > token
+ eyJmc2lkIjoiOWY1MjgyZGItYjg5OS00NTk2LTgwOTgtMzIwYzFmYzM5NmYzIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFBUnczOWQwdkhvQmhBQVlMM1I4RmR5dHNJQU50bkFTZ0lOTVE9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMS4zOjY4MjAsdjE6MTkyLjE2OC4xLjM6NjgyMV0ifQ==
+ EOF
+ $ rbd --cluster site-b mirror pool peer bootstrap import --site-name site-b image-pool token
+
+Add Cluster Peer Manually
+-------------------------
+
+Cluster peers can be specified manually if desired or if the above bootstrap
+commands are not available with the currently installed Ceph release.
+
+The remote ``rbd-mirror`` daemon will need access to the local cluster to
+perform mirroring. A new local Ceph user should be created for the remote
+daemon to use. To `create a Ceph user`_, with ``ceph`` specify the
+``auth get-or-create`` command, user name, monitor caps, and OSD caps::
+
+ ceph auth get-or-create client.rbd-mirror-peer mon 'profile rbd' osd 'profile rbd'
+
+The resulting keyring should be copied to the other cluster's ``rbd-mirror``
+daemon hosts if not using the Ceph monitor ``config-key`` store described below.
+
+To manually add a mirroring peer Ceph cluster with ``rbd``, specify the
+``mirror pool peer add`` command, the pool name, and a cluster specification::
+
+ rbd mirror pool peer add {pool-name} {client-name}@{cluster-name}
+
+For example::
+
+ $ rbd --cluster site-a mirror pool peer add image-pool client.rbd-mirror-peer@site-b
+ $ rbd --cluster site-b mirror pool peer add image-pool client.rbd-mirror-peer@site-a
+
+By default, the ``rbd-mirror`` daemon needs to have access to a Ceph
+configuration file located at ``/etc/ceph/{cluster-name}.conf`` that provides
+the addresses of the peer cluster's monitors, in addition to a keyring for
+``{client-name}`` located in the default or configured keyring search paths
+(e.g. ``/etc/ceph/{cluster-name}.{client-name}.keyring``).
+
+Alternatively, the peer cluster's monitor and/or client key can be securely
+stored within the local Ceph monitor ``config-key`` store. To specify the
+peer cluster connection attributes when adding a mirroring peer, use the
+``--remote-mon-host`` and ``--remote-key-file`` optionals. For example::
+
+ $ cat <<EOF > remote-key-file
+ AQAeuZdbMMoBChAAcj++/XUxNOLFaWdtTREEsw==
+ EOF
+ $ rbd --cluster site-a mirror pool peer add image-pool client.rbd-mirror-peer@site-b --remote-mon-host 192.168.1.1,192.168.1.2 --remote-key-file remote-key-file
+ $ rbd --cluster site-a mirror pool info image-pool --all
+ Mode: pool
+ Peers:
+ UUID NAME CLIENT MON_HOST KEY
+ 587b08db-3d33-4f32-8af8-421e77abb081 site-b client.rbd-mirror-peer 192.168.1.1,192.168.1.2 AQAeuZdbMMoBChAAcj++/XUxNOLFaWdtTREEsw==
+
+Remove Cluster Peer
+-------------------
+
+To remove a mirroring peer Ceph cluster with ``rbd``, specify the
+``mirror pool peer remove`` command, the pool name, and the peer UUID
+(available from the ``rbd mirror pool info`` command)::
+
+ rbd mirror pool peer remove {pool-name} {peer-uuid}
+
+For example::
+
+ $ rbd --cluster site-a mirror pool peer remove image-pool 55672766-c02b-4729-8567-f13a66893445
+ $ rbd --cluster site-b mirror pool peer remove image-pool 60c0e299-b38f-4234-91f6-eed0a367be08
+
+Data Pools
+----------
+
+When creating images in the destination cluster, ``rbd-mirror`` selects a data
+pool as follows:
+
+#. If the destination cluster has a default data pool configured (with the
+ ``rbd_default_data_pool`` configuration option), it will be used.
+#. Otherwise, if the source image uses a separate data pool, and a pool with the
+ same name exists on the destination cluster, that pool will be used.
+#. If neither of the above is true, no data pool will be set.
+
+Image Configuration
+===================
+
+Unlike pool configuration, image configuration only needs to be performed
+against a single mirroring peer Ceph cluster.
+
+Mirrored RBD images are designated as either primary or non-primary. This is a
+property of the image and not the pool. Images that are designated as
+non-primary cannot be modified.
+
+Images are automatically promoted to primary when mirroring is first enabled on
+an image (either implicitly if the pool mirror mode was ``pool`` and the image
+has the journaling image feature enabled, or `explicitly enabled`_ by the
+``rbd`` command if the pool mirror mode was ``image``).
+
+Enable Image Mirroring
+----------------------
+
+If mirroring is configured in ``image`` mode for the image's pool, then it
+is necessary to explicitly enable mirroring for each image within the pool.
+To enable mirroring for a specific image with ``rbd``, specify the
+``mirror image enable`` command along with the pool, image name, and mode::
+
+ rbd mirror image enable {pool-name}/{image-name} {mode}
+
+The mirror image mode can either be ``journal`` or ``snapshot``:
+
+* **journal** (default): When configured in ``journal`` mode, mirroring will
+ utilize the RBD journaling image feature to replicate the image contents. If
+ the RBD journaling image feature is not yet enabled on the image, it will be
+ automatically enabled.
+
+* **snapshot**: When configured in ``snapshot`` mode, mirroring will utilize
+ RBD image mirror-snapshots to replicate the image contents. Once enabled, an
+ initial mirror-snapshot will automatically be created. Additional RBD image
+ `mirror-snapshots`_ can be created by the ``rbd`` command.
+
+For example::
+
+ $ rbd --cluster site-a mirror image enable image-pool/image-1 snapshot
+ $ rbd --cluster site-a mirror image enable image-pool/image-2 journal
+
+Enable Image Journaling Feature
+-------------------------------
+
+RBD journal-based mirroring uses the RBD image journaling feature to ensure that
+the replicated image always remains crash-consistent. When using the ``image``
+mirroring mode, the journaling feature will be automatically enabled when
+mirroring is enabled on the image. When using the ``pool`` mirroring mode,
+before an image can be mirrored to a peer cluster, the RBD image journaling
+feature must be enabled. The feature can be enabled at image creation time by
+providing the ``--image-feature exclusive-lock,journaling`` option to the
+``rbd`` command.
+
+Alternatively, the journaling feature can be dynamically enabled on
+pre-existing RBD images. To enable journaling with ``rbd``, specify
+the ``feature enable`` command, the pool and image name, and the feature name::
+
+ rbd feature enable {pool-name}/{image-name} {feature-name}
+
+For example::
+
+ $ rbd --cluster site-a feature enable image-pool/image-1 journaling
+
+.. note:: The journaling feature is dependent on the exclusive-lock feature. If
+ the exclusive-lock feature is not already enabled, it should be enabled prior
+ to enabling the journaling feature.
+
+.. tip:: You can enable journaling on all new images by default by adding
+ ``rbd default features = 125`` to your Ceph configuration file.
+
+.. tip:: ``rbd-mirror`` tunables are set by default to values suitable for
+ mirroring an entire pool. When using ``rbd-mirror`` to migrate single
+ volumes been clusters you may achieve substantial performance gains
+ by setting ``rbd_mirror_journal_max_fetch_bytes=33554432`` and
+ ``rbd_journal_max_payload_bytes=8388608`` within the ``[client]`` config
+ section of the local or centralized configuration. Note that these
+ settings may allow ``rbd-mirror`` to present a substantial write workload
+ to the destination cluster: monitor cluster performance closely during
+ migrations and test carefuly before running multiple migrations in parallel.
+
+Create Image Mirror-Snapshots
+-----------------------------
+
+When using snapshot-based mirroring, mirror-snapshots will need to be created
+whenever it is desired to mirror the changed contents of the RBD image. To
+create a mirror-snapshot manually with ``rbd``, specify the
+``mirror image snapshot`` command along with the pool and image name::
+
+ rbd mirror image snapshot {pool-name}/{image-name}
+
+For example::
+
+ $ rbd --cluster site-a mirror image snapshot image-pool/image-1
+
+By default up to ``5`` mirror-snapshots will be created per-image. The most
+recent mirror-snapshot is automatically pruned if the limit is reached.
+The limit can be overridden via the ``rbd_mirroring_max_mirroring_snapshots``
+configuration option if required. Additionally, mirror-snapshots are
+automatically deleted when the image is removed or when mirroring is disabled.
+
+Mirror-snapshots can also be automatically created on a periodic basis if
+mirror-snapshot schedules are defined. The mirror-snapshot can be scheduled
+globally, per-pool, or per-image levels. Multiple mirror-snapshot schedules can
+be defined at any level, but only the most-specific snapshot schedules that
+match an individual mirrored image will run.
+
+To create a mirror-snapshot schedule with ``rbd``, specify the
+``mirror snapshot schedule add`` command along with an optional pool or
+image name; interval; and optional start time::
+
+ rbd mirror snapshot schedule add [--pool {pool-name}] [--image {image-name}] {interval} [{start-time}]
+
+The ``interval`` can be specified in days, hours, or minutes using ``d``, ``h``,
+``m`` suffix respectively. The optional ``start-time`` can be specified using
+the ISO 8601 time format. For example::
+
+ $ rbd --cluster site-a mirror snapshot schedule add --pool image-pool 24h 14:00:00-05:00
+ $ rbd --cluster site-a mirror snapshot schedule add --pool image-pool --image image1 6h
+
+To remove a mirror-snapshot schedules with ``rbd``, specify the
+``mirror snapshot schedule remove`` command with options that match the
+corresponding ``add`` schedule command.
+
+To list all snapshot schedules for a specific level (global, pool, or image)
+with ``rbd``, specify the ``mirror snapshot schedule ls`` command along with
+an optional pool or image name. Additionally, the ``--recursive`` option can
+be specified to list all schedules at the specified level and below. For
+example::
+
+ $ rbd --cluster site-a mirror schedule ls --pool image-pool --recursive
+ POOL NAMESPACE IMAGE SCHEDULE
+ image-pool - - every 1d starting at 14:00:00-05:00
+ image-pool image1 every 6h
+
+To view the status for when the next snapshots will be created for
+snapshot-based mirroring RBD images with ``rbd``, specify the
+``mirror snapshot schedule status`` command along with an optional pool or
+image name::
+
+ rbd mirror snapshot schedule status [--pool {pool-name}] [--image {image-name}]
+
+For example::
+
+ $ rbd --cluster site-a mirror schedule status
+ SCHEDULE TIME IMAGE
+ 2020-02-26 18:00:00 image-pool/image1
+
+Disable Image Mirroring
+-----------------------
+
+To disable mirroring for a specific image with ``rbd``, specify the
+``mirror image disable`` command along with the pool and image name::
+
+ rbd mirror image disable {pool-name}/{image-name}
+
+For example::
+
+ $ rbd --cluster site-a mirror image disable image-pool/image-1
+
+Image Promotion and Demotion
+----------------------------
+
+In a failover scenario where the primary designation needs to be moved to the
+image in the peer Ceph cluster, access to the primary image should be stopped
+(e.g. power down the VM or remove the associated drive from a VM), demote the
+current primary image, promote the new primary image, and resume access to the
+image on the alternate cluster.
+
+.. note:: RBD only provides the necessary tools to facilitate an orderly
+ failover of an image. An external mechanism is required to coordinate the
+ full failover process (e.g. closing the image before demotion).
+
+To demote a specific image to non-primary with ``rbd``, specify the
+``mirror image demote`` command along with the pool and image name::
+
+ rbd mirror image demote {pool-name}/{image-name}
+
+For example::
+
+ $ rbd --cluster site-a mirror image demote image-pool/image-1
+
+To demote all primary images within a pool to non-primary with ``rbd``, specify
+the ``mirror pool demote`` command along with the pool name::
+
+ rbd mirror pool demote {pool-name}
+
+For example::
+
+ $ rbd --cluster site-a mirror pool demote image-pool
+
+To promote a specific image to primary with ``rbd``, specify the
+``mirror image promote`` command along with the pool and image name::
+
+ rbd mirror image promote [--force] {pool-name}/{image-name}
+
+For example::
+
+ $ rbd --cluster site-b mirror image promote image-pool/image-1
+
+To promote all non-primary images within a pool to primary with ``rbd``, specify
+the ``mirror pool promote`` command along with the pool name::
+
+ rbd mirror pool promote [--force] {pool-name}
+
+For example::
+
+ $ rbd --cluster site-a mirror pool promote image-pool
+
+.. tip:: Since the primary / non-primary status is per-image, it is possible to
+ have two clusters split the IO load and stage failover / failback.
+
+.. note:: Promotion can be forced using the ``--force`` option. Forced
+ promotion is needed when the demotion cannot be propagated to the peer
+ Ceph cluster (e.g. Ceph cluster failure, communication outage). This will
+ result in a split-brain scenario between the two peers and the image will no
+ longer be in-sync until a `force resync command`_ is issued.
+
+Force Image Resync
+------------------
+
+If a split-brain event is detected by the ``rbd-mirror`` daemon, it will not
+attempt to mirror the affected image until corrected. To resume mirroring for an
+image, first `demote the image`_ determined to be out-of-date and then request a
+resync to the primary image. To request an image resync with ``rbd``, specify
+the ``mirror image resync`` command along with the pool and image name::
+
+ rbd mirror image resync {pool-name}/{image-name}
+
+For example::
+
+ $ rbd mirror image resync image-pool/image-1
+
+.. note:: The ``rbd`` command only flags the image as requiring a resync. The
+ local cluster's ``rbd-mirror`` daemon process is responsible for performing
+ the resync asynchronously.
+
+Mirror Status
+=============
+
+The peer cluster replication status is stored for every primary mirrored image.
+This status can be retrieved using the ``mirror image status`` and
+``mirror pool status`` commands.
+
+To request the mirror image status with ``rbd``, specify the
+``mirror image status`` command along with the pool and image name::
+
+ rbd mirror image status {pool-name}/{image-name}
+
+For example::
+
+ $ rbd mirror image status image-pool/image-1
+
+To request the mirror pool summary status with ``rbd``, specify the
+``mirror pool status`` command along with the pool name::
+
+ rbd mirror pool status {pool-name}
+
+For example::
+
+ $ rbd mirror pool status image-pool
+
+.. note:: Adding ``--verbose`` option to the ``mirror pool status`` command will
+ additionally output status details for every mirroring image in the pool.
+
+rbd-mirror Daemon
+=================
+
+The two ``rbd-mirror`` daemons are responsible for watching image journals on
+the remote, peer cluster and replaying the journal events against the local
+cluster. The RBD image journaling feature records all modifications to the
+image in the order they occur. This ensures that a crash-consistent mirror of
+the remote image is available locally.
+
+The ``rbd-mirror`` daemon is available within the optional ``rbd-mirror``
+distribution package.
+
+.. important:: Each ``rbd-mirror`` daemon requires the ability to connect
+ to both clusters simultaneously.
+.. warning:: Pre-Luminous releases: only run a single ``rbd-mirror`` daemon per
+ Ceph cluster.
+
+Each ``rbd-mirror`` daemon should use a unique Ceph user ID. To
+`create a Ceph user`_, with ``ceph`` specify the ``auth get-or-create``
+command, user name, monitor caps, and OSD caps::
+
+ ceph auth get-or-create client.rbd-mirror.{unique id} mon 'profile rbd-mirror' osd 'profile rbd'
+
+The ``rbd-mirror`` daemon can be managed by ``systemd`` by specifying the user
+ID as the daemon instance::
+
+ systemctl enable ceph-rbd-mirror@rbd-mirror.{unique id}
+
+The ``rbd-mirror`` can also be run in foreground by ``rbd-mirror`` command::
+
+ rbd-mirror -f --log-file={log_path}
+
+.. _rbd: ../../man/8/rbd
+.. _ceph-conf: ../../rados/configuration/ceph-conf/#running-multiple-clusters
+.. _explicitly enabled: #enable-image-mirroring
+.. _force resync command: #force-image-resync
+.. _demote the image: #image-promotion-and-demotion
+.. _create a Ceph user: ../../rados/operations/user-management#add-a-user
+.. _mirror-snapshots: #create-image-mirror-snapshots
diff --git a/doc/rbd/rbd-openstack.rst b/doc/rbd/rbd-openstack.rst
new file mode 100644
index 000000000..3f1b85f30
--- /dev/null
+++ b/doc/rbd/rbd-openstack.rst
@@ -0,0 +1,395 @@
+=============================
+ Block Devices and OpenStack
+=============================
+
+.. index:: Ceph Block Device; OpenStack
+
+You can attach Ceph Block Device images to OpenStack instances through ``libvirt``,
+which configures the QEMU interface to ``librbd``. Ceph stripes block volumes
+across multiple OSDs within the cluster, which means that large volumes can
+realize better performance than local drives on a standalone server!
+
+To use Ceph Block Devices with OpenStack, you must install QEMU, ``libvirt``,
+and OpenStack first. We recommend using a separate physical node for your
+OpenStack installation. OpenStack recommends a minimum of 8GB of RAM and a
+quad-core processor. The following diagram depicts the OpenStack/Ceph
+technology stack.
+
+
+.. ditaa::
+
+ +---------------------------------------------------+
+ | OpenStack |
+ +---------------------------------------------------+
+ | libvirt |
+ +------------------------+--------------------------+
+ |
+ | configures
+ v
+ +---------------------------------------------------+
+ | QEMU |
+ +---------------------------------------------------+
+ | librbd |
+ +---------------------------------------------------+
+ | librados |
+ +------------------------+-+------------------------+
+ | OSDs | | Monitors |
+ +------------------------+ +------------------------+
+
+.. important:: To use Ceph Block Devices with OpenStack, you must have
+ access to a running Ceph Storage Cluster.
+
+Three parts of OpenStack integrate with Ceph's block devices:
+
+- **Images**: OpenStack Glance manages images for VMs. Images are immutable.
+ OpenStack treats images as binary blobs and downloads them accordingly.
+
+- **Volumes**: Volumes are block devices. OpenStack uses volumes to boot VMs,
+ or to attach volumes to running VMs. OpenStack manages volumes using
+ Cinder services.
+
+- **Guest Disks**: Guest disks are guest operating system disks. By default,
+ when you boot a virtual machine, its disk appears as a file on the file system
+ of the hypervisor (usually under ``/var/lib/nova/instances/<uuid>/``). Prior
+ to OpenStack Havana, the only way to boot a VM in Ceph was to use the
+ boot-from-volume functionality of Cinder. However, now it is possible to boot
+ every virtual machine inside Ceph directly without using Cinder, which is
+ advantageous because it allows you to perform maintenance operations easily
+ with the live-migration process. Additionally, if your hypervisor dies it is
+ also convenient to trigger ``nova evacuate`` and reinstate the virtual machine
+ elsewhere almost seamlessly. In doing so,
+ :ref:`exclusive locks <rbd-exclusive-locks>` prevent multiple
+ compute nodes from concurrently accessing the guest disk.
+
+
+You can use OpenStack Glance to store images as Ceph Block Devices, and you
+can use Cinder to boot a VM using a copy-on-write clone of an image.
+
+The instructions below detail the setup for Glance, Cinder and Nova, although
+they do not have to be used together. You may store images in Ceph block devices
+while running VMs using a local disk, or vice versa.
+
+.. important:: Using QCOW2 for hosting a virtual machine disk is NOT recommended.
+ If you want to boot virtual machines in Ceph (ephemeral backend or boot
+ from volume), please use the ``raw`` image format within Glance.
+
+.. index:: pools; OpenStack
+
+Create a Pool
+=============
+
+By default, Ceph block devices live within the ``rbd`` pool. You may use any
+suitable pool by specifying it explicitly. We recommend creating a pool for
+Cinder and a pool for Glance. Ensure your Ceph cluster is running, then create the pools. ::
+
+ ceph osd pool create volumes
+ ceph osd pool create images
+ ceph osd pool create backups
+ ceph osd pool create vms
+
+See `Create a Pool`_ for detail on specifying the number of placement groups for
+your pools, and `Placement Groups`_ for details on the number of placement
+groups you should set for your pools.
+
+Newly created pools must be initialized prior to use. Use the ``rbd`` tool
+to initialize the pools::
+
+ rbd pool init volumes
+ rbd pool init images
+ rbd pool init backups
+ rbd pool init vms
+
+.. _Create a Pool: ../../rados/operations/pools#createpool
+.. _Placement Groups: ../../rados/operations/placement-groups
+
+
+Configure OpenStack Ceph Clients
+================================
+
+The nodes running ``glance-api``, ``cinder-volume``, ``nova-compute`` and
+``cinder-backup`` act as Ceph clients. Each requires the ``ceph.conf`` file::
+
+ ssh {your-openstack-server} sudo tee /etc/ceph/ceph.conf </etc/ceph/ceph.conf
+
+
+Install Ceph client packages
+----------------------------
+
+On the ``glance-api`` node, you will need the Python bindings for ``librbd``::
+
+ sudo apt-get install python-rbd
+ sudo yum install python-rbd
+
+On the ``nova-compute``, ``cinder-backup`` and on the ``cinder-volume`` node,
+use both the Python bindings and the client command line tools::
+
+ sudo apt-get install ceph-common
+ sudo yum install ceph-common
+
+
+Setup Ceph Client Authentication
+--------------------------------
+
+If you have `cephx authentication`_ enabled, create a new user for Nova/Cinder
+and Glance. Execute the following::
+
+ ceph auth get-or-create client.glance mon 'profile rbd' osd 'profile rbd pool=images' mgr 'profile rbd pool=images'
+ ceph auth get-or-create client.cinder mon 'profile rbd' osd 'profile rbd pool=volumes, profile rbd pool=vms, profile rbd-read-only pool=images' mgr 'profile rbd pool=volumes, profile rbd pool=vms'
+ ceph auth get-or-create client.cinder-backup mon 'profile rbd' osd 'profile rbd pool=backups' mgr 'profile rbd pool=backups'
+
+Add the keyrings for ``client.cinder``, ``client.glance``, and
+``client.cinder-backup`` to the appropriate nodes and change their ownership::
+
+ ceph auth get-or-create client.glance | ssh {your-glance-api-server} sudo tee /etc/ceph/ceph.client.glance.keyring
+ ssh {your-glance-api-server} sudo chown glance:glance /etc/ceph/ceph.client.glance.keyring
+ ceph auth get-or-create client.cinder | ssh {your-volume-server} sudo tee /etc/ceph/ceph.client.cinder.keyring
+ ssh {your-cinder-volume-server} sudo chown cinder:cinder /etc/ceph/ceph.client.cinder.keyring
+ ceph auth get-or-create client.cinder-backup | ssh {your-cinder-backup-server} sudo tee /etc/ceph/ceph.client.cinder-backup.keyring
+ ssh {your-cinder-backup-server} sudo chown cinder:cinder /etc/ceph/ceph.client.cinder-backup.keyring
+
+Nodes running ``nova-compute`` need the keyring file for the ``nova-compute``
+process::
+
+ ceph auth get-or-create client.cinder | ssh {your-nova-compute-server} sudo tee /etc/ceph/ceph.client.cinder.keyring
+
+They also need to store the secret key of the ``client.cinder`` user in
+``libvirt``. The libvirt process needs it to access the cluster while attaching
+a block device from Cinder.
+
+Create a temporary copy of the secret key on the nodes running
+``nova-compute``::
+
+ ceph auth get-key client.cinder | ssh {your-compute-node} tee client.cinder.key
+
+Then, on the compute nodes, add the secret key to ``libvirt`` and remove the
+temporary copy of the key::
+
+ uuidgen
+ 457eb676-33da-42ec-9a8c-9293d545c337
+
+ cat > secret.xml <<EOF
+ <secret ephemeral='no' private='no'>
+ <uuid>457eb676-33da-42ec-9a8c-9293d545c337</uuid>
+ <usage type='ceph'>
+ <name>client.cinder secret</name>
+ </usage>
+ </secret>
+ EOF
+ sudo virsh secret-define --file secret.xml
+ Secret 457eb676-33da-42ec-9a8c-9293d545c337 created
+ sudo virsh secret-set-value --secret 457eb676-33da-42ec-9a8c-9293d545c337 --base64 $(cat client.cinder.key) && rm client.cinder.key secret.xml
+
+Save the uuid of the secret for configuring ``nova-compute`` later.
+
+.. important:: You don't necessarily need the UUID on all the compute nodes.
+ However from a platform consistency perspective, it's better to keep the
+ same UUID.
+
+.. _cephx authentication: ../../rados/configuration/auth-config-ref/#enabling-disabling-cephx
+
+
+Configure OpenStack to use Ceph
+===============================
+
+Configuring Glance
+------------------
+
+Glance can use multiple back ends to store images. To use Ceph block devices by
+default, configure Glance like the following.
+
+
+Kilo and after
+~~~~~~~~~~~~~~
+
+Edit ``/etc/glance/glance-api.conf`` and add under the ``[glance_store]`` section::
+
+ [glance_store]
+ stores = rbd
+ default_store = rbd
+ rbd_store_pool = images
+ rbd_store_user = glance
+ rbd_store_ceph_conf = /etc/ceph/ceph.conf
+ rbd_store_chunk_size = 8
+
+For more information about the configuration options available in Glance please refer to the OpenStack Configuration Reference: http://docs.openstack.org/.
+
+Enable copy-on-write cloning of images
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Note that this exposes the back end location via Glance's API, so the endpoint
+with this option enabled should not be publicly accessible.
+
+Any OpenStack version except Mitaka
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you want to enable copy-on-write cloning of images, also add under the ``[DEFAULT]`` section::
+
+ show_image_direct_url = True
+
+Disable cache management (any OpenStack version)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Disable the Glance cache management to avoid images getting cached under ``/var/lib/glance/image-cache/``,
+assuming your configuration file has ``flavor = keystone+cachemanagement``::
+
+ [paste_deploy]
+ flavor = keystone
+
+Image properties
+~~~~~~~~~~~~~~~~
+
+We recommend to use the following properties for your images:
+
+- ``hw_scsi_model=virtio-scsi``: add the virtio-scsi controller and get better performance and support for discard operation
+- ``hw_disk_bus=scsi``: connect every cinder block devices to that controller
+- ``hw_qemu_guest_agent=yes``: enable the QEMU guest agent
+- ``os_require_quiesce=yes``: send fs-freeze/thaw calls through the QEMU guest agent
+
+
+Configuring Cinder
+------------------
+
+OpenStack requires a driver to interact with Ceph block devices. You must also
+specify the pool name for the block device. On your OpenStack node, edit
+``/etc/cinder/cinder.conf`` by adding::
+
+ [DEFAULT]
+ ...
+ enabled_backends = ceph
+ glance_api_version = 2
+ ...
+ [ceph]
+ volume_driver = cinder.volume.drivers.rbd.RBDDriver
+ volume_backend_name = ceph
+ rbd_pool = volumes
+ rbd_ceph_conf = /etc/ceph/ceph.conf
+ rbd_flatten_volume_from_snapshot = false
+ rbd_max_clone_depth = 5
+ rbd_store_chunk_size = 4
+ rados_connect_timeout = -1
+
+If you are using `cephx authentication`_, also configure the user and uuid of
+the secret you added to ``libvirt`` as documented earlier::
+
+ [ceph]
+ ...
+ rbd_user = cinder
+ rbd_secret_uuid = 457eb676-33da-42ec-9a8c-9293d545c337
+
+Note that if you are configuring multiple cinder back ends,
+``glance_api_version = 2`` must be in the ``[DEFAULT]`` section.
+
+
+Configuring Cinder Backup
+-------------------------
+
+OpenStack Cinder Backup requires a specific daemon so don't forget to install it.
+On your Cinder Backup node, edit ``/etc/cinder/cinder.conf`` and add::
+
+ backup_driver = cinder.backup.drivers.ceph
+ backup_ceph_conf = /etc/ceph/ceph.conf
+ backup_ceph_user = cinder-backup
+ backup_ceph_chunk_size = 134217728
+ backup_ceph_pool = backups
+ backup_ceph_stripe_unit = 0
+ backup_ceph_stripe_count = 0
+ restore_discard_excess_bytes = true
+
+
+Configuring Nova to attach Ceph RBD block device
+------------------------------------------------
+
+In order to attach Cinder devices (either normal block or by issuing a boot
+from volume), you must tell Nova (and libvirt) which user and UUID to refer to
+when attaching the device. libvirt will refer to this user when connecting and
+authenticating with the Ceph cluster. ::
+
+ [libvirt]
+ ...
+ rbd_user = cinder
+ rbd_secret_uuid = 457eb676-33da-42ec-9a8c-9293d545c337
+
+These two flags are also used by the Nova ephemeral back end.
+
+
+Configuring Nova
+----------------
+
+In order to boot virtual machines directly from Ceph volumes, you must
+configure the ephemeral backend for Nova.
+
+It is recommended to enable the RBD cache in your Ceph configuration file; this
+has been enabled by default since the Giant release. Moreover, enabling the
+client admin socket allows the collection of metrics and can be invaluable
+for troubleshooting.
+
+This socket can be accessed on the hypvervisor (Nova compute) node::
+
+ ceph daemon /var/run/ceph/ceph-client.cinder.19195.32310016.asok help
+
+To enable RBD cache and admin sockets, ensure that on each hypervisor's
+``ceph.conf`` contains::
+
+ [client]
+ rbd cache = true
+ rbd cache writethrough until flush = true
+ admin socket = /var/run/ceph/guests/$cluster-$type.$id.$pid.$cctid.asok
+ log file = /var/log/qemu/qemu-guest-$pid.log
+ rbd concurrent management ops = 20
+
+Configure permissions for these directories::
+
+ mkdir -p /var/run/ceph/guests/ /var/log/qemu/
+ chown qemu:libvirtd /var/run/ceph/guests /var/log/qemu/
+
+Note that user ``qemu`` and group ``libvirtd`` can vary depending on your system.
+The provided example works for RedHat based systems.
+
+.. tip:: If your virtual machine is already running you can simply restart it to enable the admin socket
+
+
+Restart OpenStack
+=================
+
+To activate the Ceph block device driver and load the block device pool name
+into the configuration, you must restart the related OpenStack services.
+For Debian based systems execute these commands on the appropriate nodes::
+
+ sudo glance-control api restart
+ sudo service nova-compute restart
+ sudo service cinder-volume restart
+ sudo service cinder-backup restart
+
+For Red Hat based systems execute::
+
+ sudo service openstack-glance-api restart
+ sudo service openstack-nova-compute restart
+ sudo service openstack-cinder-volume restart
+ sudo service openstack-cinder-backup restart
+
+Once OpenStack is up and running, you should be able to create a volume
+and boot from it.
+
+
+Booting from a Block Device
+===========================
+
+You can create a volume from an image using the Cinder command line tool::
+
+ cinder create --image-id {id of image} --display-name {name of volume} {size of volume}
+
+You can use `qemu-img`_ to convert from one format to another. For example::
+
+ qemu-img convert -f {source-format} -O {output-format} {source-filename} {output-filename}
+ qemu-img convert -f qcow2 -O raw precise-cloudimg.img precise-cloudimg.raw
+
+When Glance and Cinder are both using Ceph block devices, the image is a
+copy-on-write clone, so new volumes are created quickly. In the OpenStack
+dashboard, you can boot from that volume by performing the following steps:
+
+#. Launch a new instance.
+#. Choose the image associated to the copy-on-write clone.
+#. Select 'boot from volume'.
+#. Select the volume you created.
+
+.. _qemu-img: ../qemu-rbd/#running-qemu-with-rbd
diff --git a/doc/rbd/rbd-operations.rst b/doc/rbd/rbd-operations.rst
new file mode 100644
index 000000000..cc27b5103
--- /dev/null
+++ b/doc/rbd/rbd-operations.rst
@@ -0,0 +1,16 @@
+==============================
+ Ceph Block Device Operations
+==============================
+
+.. toctree::
+ :maxdepth: 1
+
+ Snapshots<rbd-snapshot>
+ Exclusive Locking <rbd-exclusive-locks>
+ Mirroring <rbd-mirroring>
+ Live-Migration <rbd-live-migration>
+ Persistent Read-only Cache <rbd-persistent-read-only-cache>
+ Persistent Write-back Cache <rbd-persistent-write-back-cache>
+ Encryption <rbd-encryption>
+ Config Settings (librbd) <rbd-config-ref/>
+ RBD Replay <rbd-replay>
diff --git a/doc/rbd/rbd-persistent-read-only-cache.rst b/doc/rbd/rbd-persistent-read-only-cache.rst
new file mode 100644
index 000000000..5bef7f592
--- /dev/null
+++ b/doc/rbd/rbd-persistent-read-only-cache.rst
@@ -0,0 +1,201 @@
+===============================
+ RBD Persistent Read-only Cache
+===============================
+
+.. index:: Ceph Block Device; Persistent Read-only Cache
+
+Shared, Read-only Parent Image Cache
+====================================
+
+`Cloned RBD images`_ usually modify only a small fraction of the parent
+image. For example, in a VDI use-case, VMs are cloned from the same
+base image and initially differ only by hostname and IP address. During
+booting, all of these VMs read portions of the same parent
+image data. If we have a local cache of the parent
+image, this speeds up reads on the caching host. We also achieve
+reduction of client-to-cluster network traffic.
+RBD cache must be explicitly enabled in
+``ceph.conf``. The ``ceph-immutable-object-cache`` daemon is responsible for
+caching the parent content on the local disk, and future reads on that data
+will be serviced from the local cache.
+
+.. note:: RBD shared read-only parent image cache requires the Ceph Nautilus release or later.
+
+.. ditaa::
+
+ +--------------------------------------------------------+
+ | QEMU |
+ +--------------------------------------------------------+
+ | librbd (cloned images) |
+ +-------------------+-+----------------------------------+
+ | librados | | ceph--immutable--object--cache |
+ +-------------------+ +----------------------------------+
+ | OSDs/Mons | | local cached parent image |
+ +-------------------+ +----------------------------------+
+
+
+Enable RBD Shared Read-only Parent Image Cache
+----------------------------------------------
+
+To enable RBD shared read-only parent image cache, the following Ceph settings
+need to added in the ``[client]`` `section`_ of your ``ceph.conf`` file::
+
+ rbd parent cache enabled = true
+ rbd plugins = parent_cache
+
+Immutable Object Cache Daemon
+=============================
+
+Introduction and Generic Settings
+---------------------------------
+
+The ``ceph-immutable-object-cache`` daemon is responsible for caching parent
+image content within its local caching directory. Using SSDs as the underlying
+storage is recommended because doing so provides better performance.
+
+The key components of the daemon are:
+
+#. **Domain socket based IPC:** The daemon listens on a local domain socket at
+ startup and waits for connections from librbd clients.
+
+#. **LRU based promotion/demotion policy:** The daemon maintains in-memory
+ statistics of cache hits for each cache file. It demotes the cold cache
+ if capacity reaches the configured threshold.
+
+#. **File-based caching store:** The daemon maintains a simple file-based cache
+ store. On promotion, the RADOS objects are fetched from RADOS cluster and
+ stored in the local caching directory.
+
+When each cloned RBD image is opened, ``librbd`` tries to connect to the cache
+daemon through its Unix domain socket. After ``librbd`` is successfully
+connected, it coordinates with the daemon upon every subsequent read. In the
+case of an uncached read, the daemon promotes the RADOS object to the local
+caching directory and the next read of the object is serviced from the cache.
+The daemon maintains simple LRU statistics, which are used to evict cold cache
+files when required (for example, when the cache is at capacity and under
+pressure).
+
+Here are some important cache configuration settings:
+
+``immutable_object_cache_sock``
+
+:Description: The path to the domain socket used for communication between
+ librbd clients and the ceph-immutable-object-cache daemon.
+:Type: String
+:Required: No
+:Default: ``/var/run/ceph/immutable_object_cache_sock``
+
+
+``immutable_object_cache_path``
+
+:Description: The immutable object cache data directory.
+:Type: String
+:Required: No
+:Default: ``/tmp/ceph_immutable_object_cache``
+
+
+``immutable_object_cache_max_size``
+
+:Description: The max size for immutable cache.
+:Type: Size
+:Required: No
+:Default: ``1G``
+
+
+``immutable_object_cache_watermark``
+
+:Description: The high-water mark for the cache. The value is between (0, 1).
+ If the cache size reaches this threshold the daemon will start
+ to delete cold cache based on LRU statistics.
+:Type: Float
+:Required: No
+:Default: ``0.9``
+
+The ``ceph-immutable-object-cache`` daemon is available within the optional
+``ceph-immutable-object-cache`` distribution package.
+
+.. important:: ``ceph-immutable-object-cache`` daemon requires the ability to
+ connect RADOS clusters.
+
+Running the Immutable Object Cache Daemon
+-----------------------------------------
+
+``ceph-immutable-object-cache`` daemon should use a unique Ceph user ID.
+To `create a Ceph user`_, with ``ceph`` specify the ``auth get-or-create``
+command, user name, monitor caps, and OSD caps::
+
+ ceph auth get-or-create client.ceph-immutable-object-cache.{unique id} mon 'allow r' osd 'profile rbd-read-only'
+
+The ``ceph-immutable-object-cache`` daemon can be managed by ``systemd`` by specifying the user
+ID as the daemon instance::
+
+ systemctl enable ceph-immutable-object-cache@ceph-immutable-object-cache.{unique id}
+
+The ``ceph-immutable-object-cache`` can also be run in foreground by ``ceph-immutable-object-cache`` command::
+
+ ceph-immutable-object-cache -f --log-file={log_path}
+
+QOS Settings
+------------
+
+The immutable object cache supports throttling, controlled by the following settings:
+
+``immutable_object_cache_qos_schedule_tick_min``
+
+:Description: Minimum schedule tick for immutable object cache.
+:Type: Milliseconds
+:Required: No
+:Default: ``50``
+
+
+``immutable_object_cache_qos_iops_limit``
+
+:Description: The desired immutable object cache IO operations limit per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``immutable_object_cache_qos_iops_burst``
+
+:Description: The desired burst limit of immutable object cache IO operations.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``immutable_object_cache_qos_iops_burst_seconds``
+
+:Description: The desired burst duration in seconds of immutable object cache IO operations.
+:Type: Seconds
+:Required: No
+:Default: ``1``
+
+
+``immutable_object_cache_qos_bps_limit``
+
+:Description: The desired immutable object cache IO bytes limit per second.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``immutable_object_cache_qos_bps_burst``
+
+:Description: The desired burst limit of immutable object cache IO bytes.
+:Type: Unsigned Integer
+:Required: No
+:Default: ``0``
+
+
+``immutable_object_cache_qos_bps_burst_seconds``
+
+:Description: The desired burst duration in seconds of immutable object cache IO bytes.
+:Type: Seconds
+:Required: No
+:Default: ``1``
+
+.. _Cloned RBD Images: ../rbd-snapshot/#layering
+.. _section: ../../rados/configuration/ceph-conf/#configuration-sections
+.. _create a Ceph user: ../../rados/operations/user-management#add-a-user
+
diff --git a/doc/rbd/rbd-persistent-write-back-cache.rst b/doc/rbd/rbd-persistent-write-back-cache.rst
new file mode 100644
index 000000000..8066014e9
--- /dev/null
+++ b/doc/rbd/rbd-persistent-write-back-cache.rst
@@ -0,0 +1,133 @@
+================================
+ RBD Persistent Write-back Cache
+================================
+
+.. index:: Ceph Block Device; Persistent Write-back Cache
+
+Persistent Write-back Cache
+===========================
+
+The persistent write-back cache provides a persistent, fault-tolerant write-back
+cache for librbd-based RBD clients.
+
+This cache uses a log-ordered write-back design which maintains checkpoints
+internally so that writes that get flushed back to the cluster are always
+crash consistent. Even if the client cache is lost entirely, the disk image is
+still consistent but the data will appear to be stale.
+
+This cache can be used with PMEM or SSD as a cache device.
+
+Usage
+=====
+
+The persistent write-back cache manages the cache data in a persistent device.
+It looks for and creates cache files in a configured directory, and then caches
+data in the file.
+
+The persistent write-back cache can't be enabled without the exclusive-lock
+feature. It tries to enable the write-back cache only when the exclusive lock
+is acquired.
+
+The cache provides two different persistence modes. In persistent-on-write mode,
+the writes are completed only when they are persisted to the cache device and
+will be readable after a crash. In persistent-on-flush mode, the writes are
+completed as soon as it no longer needs the caller's data buffer to complete
+the writes, but does not guarantee that writes will be readable after a crash.
+The data is persisted to the cache device when a flush request is received.
+
+Initially it defaults to the persistent-on-write mode and it switches to
+persistent-on-flush mode after the first flush request is received.
+
+Enable Persistent Write-back Cache
+========================================
+
+To enable the persistent write-back cache, the following Ceph settings
+need to be enabled.::
+
+ rbd persistent cache mode = {cache-mode}
+ rbd plugins = pwl_cache
+
+Value of {cache-mode} can be ``rwl``, ``ssd`` or ``disabled``. By default the
+cache is disabled.
+
+Here are some cache configuration settings:
+
+- ``rbd_persistent_cache_path`` A file folder to cache data. This folder must
+ have DAX enabled (see `DAX`_) when using ``rwl`` mode to avoid performance
+ degradation.
+
+- ``rbd_persistent_cache_size`` The cache size per image. The minimum cache
+ size is 1 GB.
+
+The above configurations can be set per-host, per-pool, per-image etc. Eg, to
+set per-host, add the overrides to the appropriate `section`_ in the host's
+``ceph.conf`` file. To set per-pool, per-image, etc, please refer to the
+``rbd config`` `commands`_.
+
+Cache Status
+------------
+
+The persistent write-back cache is enabled when the exclusive lock is acquired,
+and it is closed when the exclusive lock is released. To check the cache status,
+users may use the command ``rbd status``. ::
+
+ rbd status {pool-name}/{image-name}
+
+The status of the cache is shown, including present, clean, cache size and the
+location as well as some basic metrics.
+
+For example::
+
+ $ rbd status rbd/foo
+ Watchers:
+ watcher=10.10.0.102:0/1061883624 client.25496 cookie=140338056493088
+ Persistent cache state:
+ host: sceph9
+ path: /mnt/nvme0/rbd-pwl.rbd.101e5824ad9a.pool
+ size: 1 GiB
+ mode: ssd
+ stats_timestamp: Sun Apr 10 13:26:32 2022
+ present: true empty: false clean: false
+ allocated: 509 MiB
+ cached: 501 MiB
+ dirty: 338 MiB
+ free: 515 MiB
+ hits_full: 1450 / 61%
+ hits_partial: 0 / 0%
+ misses: 924
+ hit_bytes: 192 MiB / 66%
+ miss_bytes: 97 MiB
+
+Flush Cache
+-----------
+
+To flush a cache file with ``rbd``, specify the ``persistent-cache flush``
+command, the pool name and the image name. ::
+
+ rbd persistent-cache flush {pool-name}/{image-name}
+
+If the application dies unexpectedly, this command can also be used to flush
+the cache back to OSDs.
+
+For example::
+
+ $ rbd persistent-cache flush rbd/foo
+
+Invalidate Cache
+----------------
+
+To invalidate (discard) a cache file with ``rbd``, specify the
+``persistent-cache invalidate`` command, the pool name and the image name. ::
+
+ rbd persistent-cache invalidate {pool-name}/{image-name}
+
+The command removes the cache metadata of the corresponding image, disables
+the cache feature and deletes the local cache file if it exists.
+
+For example::
+
+ $ rbd persistent-cache invalidate rbd/foo
+
+.. _section: ../../rados/configuration/ceph-conf/#configuration-sections
+.. _commands: ../../man/8/rbd#commands
+.. _DAX: https://www.kernel.org/doc/Documentation/filesystems/dax.txt
diff --git a/doc/rbd/rbd-replay.rst b/doc/rbd/rbd-replay.rst
new file mode 100644
index 000000000..b1fc4973f
--- /dev/null
+++ b/doc/rbd/rbd-replay.rst
@@ -0,0 +1,42 @@
+===================
+ RBD Replay
+===================
+
+.. index:: Ceph Block Device; RBD Replay
+
+RBD Replay is a set of tools for capturing and replaying RADOS Block Device
+(RBD) workloads. To capture an RBD workload, ``lttng-tools`` must be installed
+on the client, and ``librbd`` on the client must be the v0.87 (Giant) release
+or later. To replay an RBD workload, ``librbd`` on the client must be the Giant
+release or later.
+
+Capture and replay takes three steps:
+
+#. Capture the trace. Make sure to capture ``pthread_id`` context::
+
+ mkdir -p traces
+ lttng create -o traces librbd
+ lttng enable-event -u 'librbd:*'
+ lttng add-context -u -t pthread_id
+ lttng start
+ # run RBD workload here
+ lttng stop
+
+#. Process the trace with `rbd-replay-prep`_::
+
+ rbd-replay-prep traces/ust/uid/*/* replay.bin
+
+#. Replay the trace with `rbd-replay`_. Use read-only until you know
+ it's doing what you want::
+
+ rbd-replay --read-only replay.bin
+
+.. important:: ``rbd-replay`` will destroy data by default. Do not use against
+ an image you wish to keep, unless you use the ``--read-only`` option.
+
+The replayed workload does not have to be against the same RBD image or even the
+same cluster as the captured workload. To account for differences, you may need
+to use the ``--pool`` and ``--map-image`` options of ``rbd-replay``.
+
+.. _rbd-replay: ../../man/8/rbd-replay
+.. _rbd-replay-prep: ../../man/8/rbd-replay-prep
diff --git a/doc/rbd/rbd-snapshot.rst b/doc/rbd/rbd-snapshot.rst
new file mode 100644
index 000000000..765a1555b
--- /dev/null
+++ b/doc/rbd/rbd-snapshot.rst
@@ -0,0 +1,367 @@
+===========
+ Snapshots
+===========
+
+.. index:: Ceph Block Device; snapshots
+
+A snapshot is a read-only logical copy of an image at a particular point in
+time: a checkpoint. One of the advanced features of Ceph block devices is that
+you can create snapshots of images to retain point-in-time state history. Ceph
+also supports snapshot layering, which allows you to clone images (for example,
+VM images) quickly and easily. Ceph block device snapshots are managed using
+the ``rbd`` command and several higher-level interfaces, including `QEMU`_,
+`libvirt`_, `OpenStack`_, and `CloudStack`_.
+
+.. important:: To use RBD snapshots, you must have a running Ceph cluster.
+
+.. note:: Because RBD is unaware of any file system within an image (volume),
+ snapshots are merely `crash-consistent` unless they are coordinated within
+ the mounting (attaching) operating system. We therefore recommend that you
+ pause or stop I/O before taking a snapshot.
+
+ If the volume contains a file system, the file system should be in an
+ internally consistent state before a snapshot is taken. Snapshots taken
+ without write quiescing could need an `fsck` pass before they are mounted
+ again. To quiesce I/O you can use `fsfreeze` command. See the `fsfreeze(8)`
+ man page for more details.
+
+ For virtual machines, `qemu-guest-agent` can be used to automatically freeze
+ file systems when creating a snapshot.
+
+.. ditaa::
+
+ +------------+ +-------------+
+ | {s} | | {s} c999 |
+ | Active |<-------*| Snapshot |
+ | Image | | of Image |
+ | (stop i/o) | | (read only) |
+ +------------+ +-------------+
+
+
+Cephx Notes
+===========
+
+When `cephx`_ authentication is enabled (it is by default), you must specify a
+user name or ID and a path to the keyring containing the corresponding key. See
+:ref:`User Management <user-management>` for details.
+
+.. prompt:: bash $
+
+ rbd --id {user-ID} --keyring /path/to/secret [commands]
+ rbd --name {username} --keyring /path/to/secret [commands]
+
+For example:
+
+.. prompt:: bash $
+
+ rbd --id admin --keyring /etc/ceph/ceph.keyring [commands]
+ rbd --name client.admin --keyring /etc/ceph/ceph.keyring [commands]
+
+.. tip:: Add the user and secret to the ``CEPH_ARGS`` environment variable to
+ avoid re-entry of these parameters.
+
+
+Snapshot Basics
+===============
+
+The following procedures demonstrate how to create, list, and remove
+snapshots using the ``rbd`` command.
+
+Create Snapshot
+---------------
+
+To create a snapshot, use the ``rbd snap create`` command and specify the pool
+name, the image name, and the snap name:
+
+.. prompt:: bash $
+
+ rbd snap create {pool-name}/{image-name}@{snap-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd snap create rbd/foo@snapname
+
+
+List Snapshots
+--------------
+
+To list the snapshots of an image, use the ``rbd snap ls`` command and specify
+the pool name and the image name:
+
+.. prompt:: bash $
+
+ rbd snap ls {pool-name}/{image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd snap ls rbd/foo
+
+
+Roll back Snapshot
+------------------
+
+To roll back to a snapshot, use the ``rbd snap rollback`` command and specify
+the pool name, the image name, and the snap name:
+
+.. prompt:: bash $
+
+ rbd snap rollback {pool-name}/{image-name}@{snap-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd snap rollback rbd/foo@snapname
+
+
+.. note:: Rolling back an image to a snapshot means overwriting the current
+ version of the image with data from a snapshot. The time it takes to execute
+ a rollback increases with the size of the image. It is **faster to clone**
+ from a snapshot **than to roll back** an image to a snapshot. Cloning from a
+ snapshot is the preferred method of returning to a pre-existing state.
+
+
+Delete a Snapshot
+-----------------
+
+To delete a snapshot, use the ``rbd snap rm`` command and specify the pool
+name, the image name, and the snap name:
+
+.. prompt:: bash $
+
+ rbd snap rm {pool-name}/{image-name}@{snap-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd snap rm rbd/foo@snapname
+
+
+.. note:: Ceph OSDs delete data asynchronously, so deleting a snapshot does
+ not immediately free up the capacity of the underlying OSDs. This process is
+ known as "snaptrim", and is referred to as such in ``ceph status`` output.
+
+Purge Snapshots
+---------------
+
+To delete all snapshots, use the ``rbd snap purge`` command and specify the
+pool name and the image name:
+
+.. prompt:: bash $
+
+ rbd snap purge {pool-name}/{image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd snap purge rbd/foo
+
+
+.. index:: Ceph Block Device; snapshot layering
+
+Layering
+========
+
+Ceph supports the ability to create many copy-on-write (COW) clones of a block
+device snapshot. Snapshot layering enables Ceph block device clients to create
+images very quickly. For example, you might create a block device image with a
+Linux VM written to it, snapshot the image, protect the snapshot, and create as
+many copy-on-write clones as you like. A snapshot is read-only, so cloning a
+snapshot simplifies semantics, making it possible to create clones rapidly.
+
+
+.. ditaa::
+
+ +-------------+ +-------------+
+ | {s} c999 | | {s} |
+ | Snapshot | Child refers | COW Clone |
+ | of Image |<------------*| of Snapshot |
+ | | to Parent | |
+ | (read only) | | (writable) |
+ +-------------+ +-------------+
+
+ Parent Child
+
+.. note:: The terms "parent" and "child" refer to a Ceph block device snapshot
+ (parent) and the corresponding image cloned from the snapshot (child).
+ These terms are important for the command line usage below.
+
+Each cloned image (child) stores a reference to its parent image, which enables
+the cloned image to open the parent snapshot and read it.
+
+A copy-on-write clone of a snapshot behaves exactly like any other Ceph
+block device image. You can read to, write from, clone, and resize cloned
+images. There are no special restrictions with cloned images. However, the
+copy-on-write clone of a snapshot depends on the snapshot, so you must
+protect the snapshot before you clone it. The diagram below depicts this
+process.
+
+.. note:: Ceph supports the cloning of only "RBD format 2" images (that is,
+ images created without specifying ``--image-format 1``). The Linux kernel
+ client supports cloned images beginning with the 3.10 release.
+
+Getting Started with Layering
+-----------------------------
+
+Ceph block device layering is a simple process. You must have an image. You
+must create a snapshot of the image. You must protect the snapshot. After you
+have performed these steps, you can begin cloning the snapshot.
+
+.. ditaa::
+
+ +----------------------------+ +-----------------------------+
+ | | | |
+ | Create Block Device Image |------->| Create a Snapshot |
+ | | | |
+ +----------------------------+ +-----------------------------+
+ |
+ +--------------------------------------+
+ |
+ v
+ +----------------------------+ +-----------------------------+
+ | | | |
+ | Protect the Snapshot |------->| Clone the Snapshot |
+ | | | |
+ +----------------------------+ +-----------------------------+
+
+
+The cloned image has a reference to the parent snapshot, and includes the pool
+ID, the image ID, and the snapshot ID. The inclusion of the pool ID means that
+you may clone snapshots from one pool to images in another pool.
+
+#. **Image Template:** A common use case for block device layering is to create
+ a base image and a snapshot that serves as a template for clones. For
+ example: a user may create an image for a Linux distribution (for example,
+ Ubuntu 22.04) and create a snapshot of it. The user may occasionally update
+ the image and create a new snapshot (by using such commands as ``sudo
+ apt-get update``, ``sudo apt-get upgrade``, or ``sudo apt-get dist-upgrade``
+ followed by ``rbd snap create``). As the image matures, the user can clone
+ any one of the snapshots.
+
+#. **Extended Template:** A more advanced use case includes extending a
+ template image to provide more information than a base image. For
+ example, a user may clone an image (for example, a VM template) and install
+ other software (for example, a database, a content management system, an
+ analytics system) and then snapshot the extended image, which may itself be
+ updated just like the base image.
+
+#. **Template Pool:** One way to use block device layering is to create a pool
+ that contains (1) base images that act as templates and (2) snapshots of
+ those templates. You may then extend read-only privileges to users so that
+ they may clone the snapshots even though they do not have permissions that
+ allow them to write or execute within the pool.
+
+#. **Image Migration/Recovery:** One way to use block device layering is to
+ migrate or recover data from one pool into another pool.
+
+Protecting a Snapshot
+---------------------
+
+Clones access the parent snapshots. All clones would break if a user
+inadvertently deleted the parent snapshot. To prevent data loss, you must
+protect the snapshot before you can clone it:
+
+.. prompt:: bash $
+
+ rbd snap protect {pool-name}/{image-name}@{snapshot-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd snap protect rbd/foo@snapname
+
+.. note:: You cannot delete a protected snapshot.
+
+Cloning a Snapshot
+------------------
+
+To clone a snapshot, specify the parent pool, the parent image, and the parent
+snapshot; and also the child pool together with the image name. You must
+protect the snapshot before you can clone it:
+
+.. prompt:: bash $
+
+ rbd clone {pool-name}/{parent-image-name}@{snap-name} {pool-name}/{child-image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd clone rbd/foo@snapname rbd/bar
+
+
+.. note:: You may clone a snapshot from one pool to an image in another pool.
+ For example, you may maintain read-only images and snapshots as templates in
+ one pool, and writeable clones in another pool.
+
+Unprotecting a Snapshot
+-----------------------
+
+Before you can delete a snapshot, you must first unprotect it. Additionally,
+you may *NOT* delete snapshots that have references from clones. You must
+flatten or delete each clone of a snapshot before you can unprotect the
+snapshot:
+
+.. prompt:: bash $
+
+ rbd snap unprotect {pool-name}/{image-name}@{snapshot-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd snap unprotect rbd/foo@snapname
+
+
+Listing Children of a Snapshot
+------------------------------
+
+To list the children of a snapshot, use the ``rbd children`` command and
+specify the pool name, the image name, and the snap name:
+
+.. prompt:: bash $
+
+ rbd children {pool-name}/{image-name}@{snapshot-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd children rbd/foo@snapname
+
+
+Flattening a Cloned Image
+-------------------------
+
+Cloned images retain a reference to the parent snapshot. When you remove the
+reference to the parent snapshot from the clone, you effectively "flatten" the
+clone by copying the data stored in the snapshot to the clone. The time it
+takes to flatten a clone increases with the size of the snapshot. To delete a
+snapshot, you must first flatten the child images (or delete them):
+
+.. prompt:: bash $
+
+ rbd flatten {pool-name}/{image-name}
+
+For example:
+
+.. prompt:: bash $
+
+ rbd flatten rbd/bar
+
+.. note:: Since a flattened image contains all the data stored in the snapshot,
+ a flattened image takes up more storage space than a layered clone does.
+
+
+.. _cephx: ../../rados/configuration/auth-config-ref/
+.. _QEMU: ../qemu-rbd/
+.. _OpenStack: ../rbd-openstack/
+.. _CloudStack: ../rbd-cloudstack/
+.. _libvirt: ../libvirt/
diff --git a/doc/rbd/rbd-windows.rst b/doc/rbd/rbd-windows.rst
new file mode 100644
index 000000000..271e85592
--- /dev/null
+++ b/doc/rbd/rbd-windows.rst
@@ -0,0 +1,158 @@
+==============
+RBD on Windows
+==============
+
+The ``rbd`` command can be used to create, remove, import, export, map or
+unmap images exactly like it would on Linux. Make sure to check the
+`RBD basic commands`_ guide.
+
+``librbd.dll`` is also available for applications that can natively use Ceph.
+
+Please check the `installation guide`_ to get started.
+
+Windows service
+===============
+On Windows, ``rbd-wnbd`` daemons are managed by a centralized service. This allows
+decoupling the daemons from the Windows session from which they originate. At
+the same time, the service is responsible of recreating persistent mappings,
+usually when the host boots.
+
+Note that only one such service may run per host.
+
+By default, all image mappings are persistent. Non-persistent mappings can be
+requested using the ``-onon-persistent`` ``rbd`` flag.
+
+Persistent mappings are recreated when the service starts, unless explicitly
+unmapped. The service disconnects the mappings when being stopped. This also
+allows adjusting the Windows service start order so that RBD images can be
+mapped before starting services that may depend on it, such as VMMS.
+
+In order to be able to reconnect the images, ``rbd-wnbd`` stores mapping
+information in the Windows registry at the following location:
+``SYSTEM\CurrentControlSet\Services\rbd-wnbd``.
+
+The following command can be used to configure the service. Please update
+the ``rbd-wnbd.exe`` path accordingly::
+
+ New-Service -Name "ceph-rbd" `
+ -Description "Ceph RBD Mapping Service" `
+ -BinaryPathName "c:\ceph\rbd-wnbd.exe service" `
+ -StartupType Automatic
+
+Note that the Ceph MSI installer takes care of creating the ``ceph-rbd``
+Windows service.
+
+Usage
+=====
+
+Integration
+-----------
+
+RBD images can be exposed to the OS and host Windows partitions or they can be
+attached to Hyper-V VMs in the same way as iSCSI disks.
+
+Starting with Openstack Wallaby, the Nova Hyper-V driver can attach RBD Cinder
+volumes to Hyper-V VMs.
+
+Mapping images
+--------------
+
+The workflow and CLI is similar to the Linux counterpart, with a few
+notable differences:
+
+* device paths cannot be requested. The disk number and path will be picked by
+ Windows. If a device path is provided by the used when mapping an image, it
+ will be used as an identifier, which can also be used when unmapping the
+ image.
+* the ``show`` command was added, which describes a specific mapping.
+ This can be used for retrieving the disk path.
+* the ``service`` command was added, allowing ``rbd-wnbd`` to run as a Windows service.
+ All mappings are by default perisistent, being recreated when the service
+ stops, unless explicitly unmapped. The service disconnects the mappings
+ when being stopped.
+* the ``list`` command also includes a ``status`` column.
+
+The purpose of the ``service`` mode is to ensure that mappings survive reboots
+and that the Windows service start order can be adjusted so that RBD images can
+be mapped before starting services that may depend on it, such as VMMS.
+
+The mapped images can either be consumed by the host directly or exposed to
+Hyper-V VMs.
+
+Hyper-V VM disks
+----------------
+
+The following sample imports an RBD image and boots a Hyper-V VM using it::
+
+ # Feel free to use any other image. This one is convenient to use for
+ # testing purposes because it's very small (~15MB) and the login prompt
+ # prints the pre-configured password.
+ wget http://download.cirros-cloud.net/0.5.1/cirros-0.5.1-x86_64-disk.img `
+ -OutFile cirros-0.5.1-x86_64-disk.img
+
+ # We'll need to make sure that the imported images are raw (so no qcow2 or vhdx).
+ # You may get qemu-img from https://cloudbase.it/qemu-img-windows/
+ # You can add the extracted location to $env:Path or update the path accordingly.
+ qemu-img convert -O raw cirros-0.5.1-x86_64-disk.img cirros-0.5.1-x86_64-disk.raw
+
+ rbd import cirros-0.5.1-x86_64-disk.raw
+ # Let's give it a hefty 100MB size.
+ rbd resize cirros-0.5.1-x86_64-disk.raw --size=100MB
+
+ rbd device map cirros-0.5.1-x86_64-disk.raw
+
+ # Let's have a look at the mappings.
+ rbd device list
+ Get-Disk
+
+ $mappingJson = rbd-wnbd show cirros-0.5.1-x86_64-disk.raw --format=json
+ $mappingJson = $mappingJson | ConvertFrom-Json
+
+ $diskNumber = $mappingJson.disk_number
+
+ New-VM -VMName BootFromRBD -MemoryStartupBytes 512MB
+ # The disk must be turned offline before it can be passed to Hyper-V VMs
+ Set-Disk -Number $diskNumber -IsOffline $true
+ Add-VMHardDiskDrive -VMName BootFromRBD -DiskNumber $diskNumber
+ Start-VM -VMName BootFromRBD
+
+Windows partitions
+------------------
+
+The following sample creates an empty RBD image, attaches it to the host and
+initializes a partition::
+
+ rbd create blank_image --size=1G
+ rbd device map blank_image -onon-persistent
+
+ $mappingJson = rbd-wnbd show blank_image --format=json
+ $mappingJson = $mappingJson | ConvertFrom-Json
+
+ $diskNumber = $mappingJson.disk_number
+
+ # The disk must be online before creating or accessing partitions.
+ Set-Disk -Number $diskNumber -IsOffline $false
+
+ # Initialize the disk, partition it and create a fileystem.
+ Get-Disk -Number $diskNumber | `
+ Initialize-Disk -PassThru | `
+ New-Partition -AssignDriveLetter -UseMaximumSize | `
+ Format-Volume -Force -Confirm:$false
+
+Limitations
+-----------
+
+At the moment, the Microsoft Failover Cluster can't use WNBD disks as
+Cluster Shared Volumes (CSVs) underlying storage. The main reason is that
+``WNBD`` and ``rbd-wnbd`` don't support the *SCSI Persistent Reservations*
+feature yet.
+
+Troubleshooting
+===============
+
+Please consult the `Windows troubleshooting`_ page.
+
+.. _Windows troubleshooting: ../../install/windows-troubleshooting
+.. _installation guide: ../../install/windows-install
+.. _RBD basic commands: ../rados-rbd-cmds
+.. _WNBD driver: https://github.com/cloudbase/wnbd