summaryrefslogtreecommitdiffstats
path: root/src/ceph-volume/ceph_volume/devices/lvm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/__init__.py1
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/activate.py384
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/batch.py654
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/common.py190
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/create.py77
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/deactivate.py88
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/listing.py223
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/main.py54
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/migrate.py693
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/prepare.py441
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/trigger.py70
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/zap.py406
12 files changed, 3281 insertions, 0 deletions
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/__init__.py b/src/ceph-volume/ceph_volume/devices/lvm/__init__.py
new file mode 100644
index 000000000..3c147123e
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/__init__.py
@@ -0,0 +1 @@
+from .main import LVM # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/activate.py b/src/ceph-volume/ceph_volume/devices/lvm/activate.py
new file mode 100644
index 000000000..4ad117ec0
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/activate.py
@@ -0,0 +1,384 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+from textwrap import dedent
+from ceph_volume import process, conf, decorators, terminal, __release__, configuration
+from ceph_volume.util import system, disk
+from ceph_volume.util import prepare as prepare_utils
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume.systemd import systemctl
+from ceph_volume.api import lvm as api
+from .listing import direct_report
+
+
+logger = logging.getLogger(__name__)
+
+
+def activate_filestore(osd_lvs, no_systemd=False):
+ # find the osd
+ for osd_lv in osd_lvs:
+ if osd_lv.tags.get('ceph.type') == 'data':
+ data_lv = osd_lv
+ break
+ else:
+ raise RuntimeError('Unable to find a data LV for filestore activation')
+
+ is_encrypted = data_lv.tags.get('ceph.encrypted', '0') == '1'
+ is_vdo = data_lv.tags.get('ceph.vdo', '0')
+
+ osd_id = data_lv.tags['ceph.osd_id']
+ configuration.load_ceph_conf_path(data_lv.tags['ceph.cluster_name'])
+ configuration.load()
+ # it may have a volume with a journal
+ for osd_lv in osd_lvs:
+ if osd_lv.tags.get('ceph.type') == 'journal':
+ osd_journal_lv = osd_lv
+ break
+ else:
+ osd_journal_lv = None
+
+ # TODO: add sensible error reporting if this is ever the case
+ # blow up with a KeyError if this doesn't exist
+ osd_fsid = data_lv.tags['ceph.osd_fsid']
+ if not osd_journal_lv:
+ # must be a disk partition, by querying blkid by the uuid we are ensuring that the
+ # device path is always correct
+ journal_uuid = data_lv.tags['ceph.journal_uuid']
+ osd_journal = disk.get_device_from_partuuid(journal_uuid)
+ else:
+ journal_uuid = osd_journal_lv.lv_uuid
+ osd_journal = data_lv.tags['ceph.journal_device']
+
+ if not osd_journal:
+ raise RuntimeError('unable to detect an lv or device journal for OSD %s' % osd_id)
+
+ # this is done here, so that previous checks that ensure path availability
+ # and correctness can still be enforced, and report if any issues are found
+ if is_encrypted:
+ lockbox_secret = data_lv.tags['ceph.cephx_lockbox_secret']
+ # this keyring writing is idempotent
+ encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret)
+ dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid)
+ encryption_utils.luks_open(dmcrypt_secret, data_lv.lv_path, data_lv.lv_uuid)
+ encryption_utils.luks_open(dmcrypt_secret, osd_journal, journal_uuid)
+
+ osd_journal = '/dev/mapper/%s' % journal_uuid
+ source = '/dev/mapper/%s' % data_lv.lv_uuid
+ else:
+ source = data_lv.lv_path
+
+ # mount the osd
+ destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+ if not system.device_is_mounted(source, destination=destination):
+ prepare_utils.mount_osd(source, osd_id, is_vdo=is_vdo)
+
+ # ensure that the OSD destination is always chowned properly
+ system.chown(destination)
+
+ # always re-do the symlink regardless if it exists, so that the journal
+ # device path that may have changed can be mapped correctly every time
+ destination = '/var/lib/ceph/osd/%s-%s/journal' % (conf.cluster, osd_id)
+ process.run(['ln', '-snf', osd_journal, destination])
+
+ # make sure that the journal has proper permissions
+ system.chown(osd_journal)
+
+ if no_systemd is False:
+ # enable the ceph-volume unit for this OSD
+ systemctl.enable_volume(osd_id, osd_fsid, 'lvm')
+
+ # enable the OSD
+ systemctl.enable_osd(osd_id)
+
+ # start the OSD
+ systemctl.start_osd(osd_id)
+ terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
+
+
+def get_osd_device_path(osd_lvs, device_type, dmcrypt_secret=None):
+ """
+ ``device_type`` can be one of ``db``, ``wal`` or ``block`` so that we can
+ query LVs on system and fallback to querying the uuid if that is not
+ present.
+
+ Return a path if possible, failing to do that a ``None``, since some of
+ these devices are optional.
+ """
+ osd_block_lv = None
+ for lv in osd_lvs:
+ if lv.tags.get('ceph.type') == 'block':
+ osd_block_lv = lv
+ break
+ if osd_block_lv:
+ is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1'
+ logger.debug('Found block device (%s) with encryption: %s', osd_block_lv.name, is_encrypted)
+ uuid_tag = 'ceph.%s_uuid' % device_type
+ device_uuid = osd_block_lv.tags.get(uuid_tag)
+ if not device_uuid:
+ return None
+
+ device_lv = None
+ for lv in osd_lvs:
+ if lv.tags.get('ceph.type') == device_type:
+ device_lv = lv
+ break
+ if device_lv:
+ if is_encrypted:
+ encryption_utils.luks_open(dmcrypt_secret, device_lv.lv_path, device_uuid)
+ return '/dev/mapper/%s' % device_uuid
+ return device_lv.lv_path
+
+ # this could be a regular device, so query it with blkid
+ physical_device = disk.get_device_from_partuuid(device_uuid)
+ if physical_device:
+ if is_encrypted:
+ encryption_utils.luks_open(dmcrypt_secret, physical_device, device_uuid)
+ return '/dev/mapper/%s' % device_uuid
+ return physical_device
+
+ raise RuntimeError('could not find %s with uuid %s' % (device_type, device_uuid))
+
+
+def activate_bluestore(osd_lvs, no_systemd=False, no_tmpfs=False):
+ for lv in osd_lvs:
+ if lv.tags.get('ceph.type') == 'block':
+ osd_block_lv = lv
+ break
+ else:
+ raise RuntimeError('could not find a bluestore OSD to activate')
+
+ is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1'
+ dmcrypt_secret = None
+ osd_id = osd_block_lv.tags['ceph.osd_id']
+ conf.cluster = osd_block_lv.tags['ceph.cluster_name']
+ osd_fsid = osd_block_lv.tags['ceph.osd_fsid']
+ configuration.load_ceph_conf_path(osd_block_lv.tags['ceph.cluster_name'])
+ configuration.load()
+
+ # mount on tmpfs the osd directory
+ osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+ if not system.path_is_mounted(osd_path):
+ # mkdir -p and mount as tmpfs
+ prepare_utils.create_osd_path(osd_id, tmpfs=not no_tmpfs)
+ # XXX This needs to be removed once ceph-bluestore-tool can deal with
+ # symlinks that exist in the osd dir
+ for link_name in ['block', 'block.db', 'block.wal']:
+ link_path = os.path.join(osd_path, link_name)
+ if os.path.exists(link_path):
+ os.unlink(os.path.join(osd_path, link_name))
+ # encryption is handled here, before priming the OSD dir
+ if is_encrypted:
+ osd_lv_path = '/dev/mapper/%s' % osd_block_lv.lv_uuid
+ lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret']
+ encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret)
+ dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid)
+ encryption_utils.luks_open(dmcrypt_secret, osd_block_lv.lv_path, osd_block_lv.lv_uuid)
+ else:
+ osd_lv_path = osd_block_lv.lv_path
+
+ db_device_path = get_osd_device_path(osd_lvs, 'db', dmcrypt_secret=dmcrypt_secret)
+ wal_device_path = get_osd_device_path(osd_lvs, 'wal', dmcrypt_secret=dmcrypt_secret)
+
+ # Once symlinks are removed, the osd dir can be 'primed again. chown first,
+ # regardless of what currently exists so that ``prime-osd-dir`` can succeed
+ # even if permissions are somehow messed up
+ system.chown(osd_path)
+ prime_command = [
+ 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster,
+ 'prime-osd-dir', '--dev', osd_lv_path,
+ '--path', osd_path]
+
+ if __release__ != "luminous":
+ # mon-config changes are not available in Luminous
+ prime_command.append('--no-mon-config')
+
+ process.run(prime_command)
+ # always re-do the symlink regardless if it exists, so that the block,
+ # block.wal, and block.db devices that may have changed can be mapped
+ # correctly every time
+ process.run(['ln', '-snf', osd_lv_path, os.path.join(osd_path, 'block')])
+ system.chown(os.path.join(osd_path, 'block'))
+ system.chown(osd_path)
+ if db_device_path:
+ destination = os.path.join(osd_path, 'block.db')
+ process.run(['ln', '-snf', db_device_path, destination])
+ system.chown(db_device_path)
+ system.chown(destination)
+ if wal_device_path:
+ destination = os.path.join(osd_path, 'block.wal')
+ process.run(['ln', '-snf', wal_device_path, destination])
+ system.chown(wal_device_path)
+ system.chown(destination)
+
+ if no_systemd is False:
+ # enable the ceph-volume unit for this OSD
+ systemctl.enable_volume(osd_id, osd_fsid, 'lvm')
+
+ # enable the OSD
+ systemctl.enable_osd(osd_id)
+
+ # start the OSD
+ systemctl.start_osd(osd_id)
+ terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
+
+
+class Activate(object):
+
+ help = 'Discover and mount the LVM device associated with an OSD ID and start the Ceph OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def activate_all(self, args):
+ listed_osds = direct_report()
+ osds = {}
+ for osd_id, devices in listed_osds.items():
+ # the metadata for all devices in each OSD will contain
+ # the FSID which is required for activation
+ for device in devices:
+ fsid = device.get('tags', {}).get('ceph.osd_fsid')
+ if fsid:
+ osds[fsid] = osd_id
+ break
+ if not osds:
+ terminal.warning('Was unable to find any OSDs to activate')
+ terminal.warning('Verify OSDs are present with "ceph-volume lvm list"')
+ return
+ for osd_fsid, osd_id in osds.items():
+ if not args.no_systemd and systemctl.osd_is_active(osd_id):
+ terminal.warning(
+ 'OSD ID %s FSID %s process is active. Skipping activation' % (osd_id, osd_fsid)
+ )
+ else:
+ terminal.info('Activating OSD ID %s FSID %s' % (osd_id, osd_fsid))
+ self.activate(args, osd_id=osd_id, osd_fsid=osd_fsid)
+
+ @decorators.needs_root
+ def activate(self, args, osd_id=None, osd_fsid=None):
+ """
+ :param args: The parsed arguments coming from the CLI
+ :param osd_id: When activating all, this gets populated with an
+ existing OSD ID
+ :param osd_fsid: When activating all, this gets populated with an
+ existing OSD FSID
+ """
+ osd_id = osd_id if osd_id else args.osd_id
+ osd_fsid = osd_fsid if osd_fsid else args.osd_fsid
+
+ if osd_id and osd_fsid:
+ tags = {'ceph.osd_id': osd_id, 'ceph.osd_fsid': osd_fsid}
+ elif not osd_id and osd_fsid:
+ tags = {'ceph.osd_fsid': osd_fsid}
+ elif osd_id and not osd_fsid:
+ raise RuntimeError('could not activate osd.{}, please provide the '
+ 'osd_fsid too'.format(osd_id))
+ else:
+ raise RuntimeError('Please provide both osd_id and osd_fsid')
+ lvs = api.get_lvs(tags=tags)
+ if not lvs:
+ raise RuntimeError('could not find osd.%s with osd_fsid %s' %
+ (osd_id, osd_fsid))
+
+ # This argument is only available when passed in directly or via
+ # systemd, not when ``create`` is being used
+ if getattr(args, 'auto_detect_objectstore', False):
+ logger.info('auto detecting objectstore')
+ # may get multiple lvs, so can't do get_the_lvs() calls here
+ for lv in lvs:
+ has_journal = lv.tags.get('ceph.journal_uuid')
+ if has_journal:
+ logger.info('found a journal associated with the OSD, '
+ 'assuming filestore')
+ return activate_filestore(lvs, args.no_systemd)
+
+ logger.info('unable to find a journal associated with the OSD, '
+ 'assuming bluestore')
+
+ return activate_bluestore(lvs, args.no_systemd)
+
+ # explicit filestore/bluestore flags take precedence
+ if getattr(args, 'bluestore', False):
+ activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False))
+ elif getattr(args, 'filestore', False):
+ activate_filestore(lvs, args.no_systemd)
+ elif any('ceph.block_device' in lv.tags for lv in lvs):
+ activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False))
+ elif any('ceph.data_device' in lv.tags for lv in lvs):
+ activate_filestore(lvs, args.no_systemd)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Activate OSDs by discovering them with LVM and mounting them in their
+ appropriate destination:
+
+ ceph-volume lvm activate {ID} {FSID}
+
+ The lvs associated with the OSD need to have been prepared previously,
+ so that all needed tags and metadata exist.
+
+ When migrating OSDs, or a multiple-osd activation is needed, the
+ ``--all`` flag can be used instead of the individual ID and FSID:
+
+ ceph-volume lvm activate --all
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm activate',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'osd_id',
+ metavar='ID',
+ nargs='?',
+ help='The ID of the OSD, usually an integer, like 0'
+ )
+ parser.add_argument(
+ 'osd_fsid',
+ metavar='FSID',
+ nargs='?',
+ help='The FSID of the OSD, similar to a SHA1'
+ )
+ parser.add_argument(
+ '--auto-detect-objectstore',
+ action='store_true',
+ help='Autodetect the objectstore by inspecting the OSD',
+ )
+ parser.add_argument(
+ '--bluestore',
+ action='store_true',
+ help='force bluestore objectstore activation',
+ )
+ parser.add_argument(
+ '--filestore',
+ action='store_true',
+ help='force filestore objectstore activation',
+ )
+ parser.add_argument(
+ '--all',
+ dest='activate_all',
+ action='store_true',
+ help='Activate all OSDs found in the system',
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip creating and enabling systemd units and starting OSD services',
+ )
+ parser.add_argument(
+ '--no-tmpfs',
+ action='store_true',
+ help='Do not use a tmpfs mount for OSD data dir'
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ if args.activate_all:
+ self.activate_all(args)
+ else:
+ self.activate(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/src/ceph-volume/ceph_volume/devices/lvm/batch.py
new file mode 100644
index 000000000..c97d3a25b
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/batch.py
@@ -0,0 +1,654 @@
+import argparse
+from collections import namedtuple
+import json
+import logging
+from textwrap import dedent
+from ceph_volume import terminal, decorators
+from ceph_volume.util import disk, prompt_bool, arg_validators, templates
+from ceph_volume.util import prepare
+from . import common
+from .create import Create
+from .prepare import Prepare
+
+mlogger = terminal.MultiLogger(__name__)
+logger = logging.getLogger(__name__)
+
+
+device_list_template = """
+ * {path: <25} {size: <10} {state}"""
+
+
+def device_formatter(devices):
+ lines = []
+ for path, details in devices:
+ lines.append(device_list_template.format(
+ path=path, size=details['human_readable_size'],
+ state='solid' if details['rotational'] == '0' else 'rotational')
+ )
+
+ return ''.join(lines)
+
+
+def ensure_disjoint_device_lists(data, db=[], wal=[], journal=[]):
+ # check that all device lists are disjoint with each other
+ if not all([set(data).isdisjoint(set(db)),
+ set(data).isdisjoint(set(wal)),
+ set(data).isdisjoint(set(journal)),
+ set(db).isdisjoint(set(wal))]):
+ raise Exception('Device lists are not disjoint')
+
+
+def separate_devices_from_lvs(devices):
+ phys = []
+ lvm = []
+ for d in devices:
+ phys.append(d) if d.is_device else lvm.append(d)
+ return phys, lvm
+
+
+def get_physical_osds(devices, args):
+ '''
+ Goes through passed physical devices and assigns OSDs
+ '''
+ data_slots = args.osds_per_device
+ if args.data_slots:
+ data_slots = max(args.data_slots, args.osds_per_device)
+ rel_data_size = 1.0 / data_slots
+ mlogger.debug('relative data size: {}'.format(rel_data_size))
+ ret = []
+ for dev in devices:
+ if dev.available_lvm:
+ dev_size = dev.vg_size[0]
+ abs_size = disk.Size(b=int(dev_size * rel_data_size))
+ free_size = dev.vg_free[0]
+ for _ in range(args.osds_per_device):
+ if abs_size > free_size:
+ break
+ free_size -= abs_size.b
+ osd_id = None
+ if args.osd_ids:
+ osd_id = args.osd_ids.pop()
+ ret.append(Batch.OSD(dev.path,
+ rel_data_size,
+ abs_size,
+ args.osds_per_device,
+ osd_id,
+ 'dmcrypt' if args.dmcrypt else None,
+ dev.symlink))
+ return ret
+
+
+def get_lvm_osds(lvs, args):
+ '''
+ Goes through passed LVs and assigns planned osds
+ '''
+ ret = []
+ for lv in lvs:
+ if lv.used_by_ceph:
+ continue
+ osd_id = None
+ if args.osd_ids:
+ osd_id = args.osd_ids.pop()
+ osd = Batch.OSD("{}/{}".format(lv.vg_name, lv.lv_name),
+ 100.0,
+ disk.Size(b=int(lv.lvs[0].lv_size)),
+ 1,
+ osd_id,
+ 'dmcrypt' if args.dmcrypt else None)
+ ret.append(osd)
+ return ret
+
+
+def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, args):
+ requested_slots = getattr(args, '{}_slots'.format(type_))
+ if not requested_slots or requested_slots < fast_slots_per_device:
+ if requested_slots:
+ mlogger.info('{}_slots argument is too small, ignoring'.format(type_))
+ requested_slots = fast_slots_per_device
+
+ requested_size = getattr(args, '{}_size'.format(type_), 0)
+ if not requested_size or requested_size == 0:
+ # no size argument was specified, check ceph.conf
+ get_size_fct = getattr(prepare, 'get_{}_size'.format(type_))
+ requested_size = get_size_fct(lv_format=False)
+
+ ret = []
+ vg_device_map = group_devices_by_vg(devices)
+ for vg_devices in vg_device_map.values():
+ for dev in vg_devices:
+ if not dev.available_lvm:
+ continue
+ # any LV present is considered a taken slot
+ occupied_slots = len(dev.lvs)
+ dev_size = dev.vg_size[0]
+ # this only looks at the first vg on device, unsure if there is a better
+ # way
+ abs_size = disk.Size(b=int(dev_size / requested_slots))
+ free_size = dev.vg_free[0]
+ relative_size = int(abs_size) / dev_size
+ if requested_size:
+ if requested_size <= abs_size:
+ abs_size = requested_size
+ relative_size = int(abs_size) / dev_size
+ else:
+ mlogger.error(
+ '{} was requested for {}, but only {} can be fulfilled'.format(
+ requested_size,
+ '{}_size'.format(type_),
+ abs_size,
+ ))
+ exit(1)
+ while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
+ free_size -= abs_size.b
+ occupied_slots += 1
+ ret.append((dev.path, relative_size, abs_size, requested_slots))
+ return ret
+
+def group_devices_by_vg(devices):
+ result = dict()
+ result['unused_devices'] = []
+ for dev in devices:
+ if len(dev.vgs) > 0:
+ # already using assumption that a PV only belongs to single VG in other places
+ vg_name = dev.vgs[0].name
+ if vg_name in result:
+ result[vg_name].append(dev)
+ else:
+ result[vg_name] = [dev]
+ else:
+ result['unused_devices'].append(dev)
+ return result
+
+def get_lvm_fast_allocs(lvs):
+ return [("{}/{}".format(d.vg_name, d.lv_name), 100.0,
+ disk.Size(b=int(d.lvs[0].lv_size)), 1) for d in lvs if not
+ d.used_by_ceph]
+
+
+class Batch(object):
+
+ help = 'Automatically size devices for multi-OSD provisioning with minimal interaction'
+
+ _help = dedent("""
+ Automatically size devices ready for OSD provisioning based on default strategies.
+
+ Usage:
+
+ ceph-volume lvm batch [DEVICE...]
+
+ Devices can be physical block devices or LVs.
+ Optional reporting on possible outcomes is enabled with --report
+
+ ceph-volume lvm batch --report [DEVICE...]
+ """)
+
+ def __init__(self, argv):
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm batch',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=self._help,
+ )
+
+ parser.add_argument(
+ 'devices',
+ metavar='DEVICES',
+ nargs='*',
+ type=arg_validators.ValidBatchDataDevice(),
+ default=[],
+ help='Devices to provision OSDs',
+ )
+ parser.add_argument(
+ '--db-devices',
+ nargs='*',
+ type=arg_validators.ValidBatchDevice(),
+ default=[],
+ help='Devices to provision OSDs db volumes',
+ )
+ parser.add_argument(
+ '--wal-devices',
+ nargs='*',
+ type=arg_validators.ValidBatchDevice(),
+ default=[],
+ help='Devices to provision OSDs wal volumes',
+ )
+ parser.add_argument(
+ '--journal-devices',
+ nargs='*',
+ type=arg_validators.ValidBatchDevice(),
+ default=[],
+ help='Devices to provision OSDs journal volumes',
+ )
+ parser.add_argument(
+ '--auto',
+ action='store_true',
+ help=('deploy multi-device OSDs if rotational and non-rotational drives '
+ 'are passed in DEVICES'),
+ default=True
+ )
+ parser.add_argument(
+ '--no-auto',
+ action='store_false',
+ dest='auto',
+ help=('deploy standalone OSDs if rotational and non-rotational drives '
+ 'are passed in DEVICES'),
+ )
+ parser.add_argument(
+ '--bluestore',
+ action='store_true',
+ help='bluestore objectstore (default)',
+ )
+ parser.add_argument(
+ '--filestore',
+ action='store_true',
+ help='filestore objectstore',
+ )
+ parser.add_argument(
+ '--report',
+ action='store_true',
+ help='Only report on OSD that would be created and exit',
+ )
+ parser.add_argument(
+ '--yes',
+ action='store_true',
+ help='Avoid prompting for confirmation when provisioning',
+ )
+ parser.add_argument(
+ '--format',
+ help='output format, defaults to "pretty"',
+ default='pretty',
+ choices=['json', 'json-pretty', 'pretty'],
+ )
+ parser.add_argument(
+ '--dmcrypt',
+ action='store_true',
+ help='Enable device encryption via dm-crypt',
+ )
+ parser.add_argument(
+ '--crush-device-class',
+ dest='crush_device_class',
+ help='Crush device class to assign this OSD to',
+ default=""
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip creating and enabling systemd units and starting OSD services',
+ )
+ parser.add_argument(
+ '--osds-per-device',
+ type=int,
+ default=1,
+ help='Provision more than 1 (the default) OSD per device',
+ )
+ parser.add_argument(
+ '--data-slots',
+ type=int,
+ help=('Provision more than 1 (the default) OSD slot per device'
+ ' if more slots then osds-per-device are specified, slots'
+ 'will stay unoccupied'),
+ )
+ parser.add_argument(
+ '--block-db-size',
+ type=disk.Size.parse,
+ help='Set (or override) the "bluestore_block_db_size" value, in bytes'
+ )
+ parser.add_argument(
+ '--block-db-slots',
+ type=int,
+ help='Provision slots on DB device, can remain unoccupied'
+ )
+ parser.add_argument(
+ '--block-wal-size',
+ type=disk.Size.parse,
+ help='Set (or override) the "bluestore_block_wal_size" value, in bytes'
+ )
+ parser.add_argument(
+ '--block-wal-slots',
+ type=int,
+ help='Provision slots on WAL device, can remain unoccupied'
+ )
+ def journal_size_in_mb_hack(size):
+ # TODO give user time to adjust, then remove this
+ if size and size[-1].isdigit():
+ mlogger.warning('DEPRECATION NOTICE')
+ mlogger.warning('--journal-size as integer is parsed as megabytes')
+ mlogger.warning('A future release will parse integers as bytes')
+ mlogger.warning('Add a "M" to explicitly pass a megabyte size')
+ size += 'M'
+ return disk.Size.parse(size)
+ parser.add_argument(
+ '--journal-size',
+ type=journal_size_in_mb_hack,
+ help='Override the "osd_journal_size" value, in megabytes'
+ )
+ parser.add_argument(
+ '--journal-slots',
+ type=int,
+ help='Provision slots on journal device, can remain unoccupied'
+ )
+ parser.add_argument(
+ '--prepare',
+ action='store_true',
+ help='Only prepare all OSDs, do not activate',
+ )
+ parser.add_argument(
+ '--osd-ids',
+ nargs='*',
+ default=[],
+ help='Reuse existing OSD ids',
+ type=arg_validators.valid_osd_id
+ )
+ self.args = parser.parse_args(argv)
+ self.parser = parser
+ for dev_list in ['', 'db_', 'wal_', 'journal_']:
+ setattr(self, '{}usable'.format(dev_list), [])
+
+ def report(self, plan):
+ report = self._create_report(plan)
+ print(report)
+
+ def _create_report(self, plan):
+ if self.args.format == 'pretty':
+ report = ''
+ report += templates.total_osds.format(total_osds=len(plan))
+
+ report += templates.osd_component_titles
+ for osd in plan:
+ report += templates.osd_header
+ report += osd.report()
+ return report
+ else:
+ json_report = []
+ for osd in plan:
+ json_report.append(osd.report_json())
+ if self.args.format == 'json':
+ return json.dumps(json_report)
+ elif self.args.format == 'json-pretty':
+ return json.dumps(json_report, indent=4,
+ sort_keys=True)
+
+ def _check_slot_args(self):
+ '''
+ checking if -slots args are consistent with other arguments
+ '''
+ if self.args.data_slots and self.args.osds_per_device:
+ if self.args.data_slots < self.args.osds_per_device:
+ raise ValueError('data_slots is smaller then osds_per_device')
+
+ def _sort_rotational_disks(self):
+ '''
+ Helper for legacy auto behaviour.
+ Sorts drives into rotating and non-rotating, the latter being used for
+ db or journal.
+ '''
+ mlogger.warning('DEPRECATION NOTICE')
+ mlogger.warning('You are using the legacy automatic disk sorting behavior')
+ mlogger.warning('The Pacific release will change the default to --no-auto')
+ rotating = []
+ ssd = []
+ for d in self.args.devices:
+ rotating.append(d) if d.rotational else ssd.append(d)
+ if ssd and not rotating:
+ # no need for additional sorting, we'll only deploy standalone on ssds
+ return
+ self.args.devices = rotating
+ if self.args.filestore:
+ self.args.journal_devices = ssd
+ else:
+ self.args.db_devices = ssd
+
+ @decorators.needs_root
+ def main(self):
+ if not self.args.devices:
+ return self.parser.print_help()
+
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not self.args.bluestore and not self.args.filestore:
+ self.args.bluestore = True
+
+ if (self.args.auto and not self.args.db_devices and not
+ self.args.wal_devices and not self.args.journal_devices):
+ self._sort_rotational_disks()
+
+ self._check_slot_args()
+
+ ensure_disjoint_device_lists(self.args.devices,
+ self.args.db_devices,
+ self.args.wal_devices,
+ self.args.journal_devices)
+
+ plan = self.get_plan(self.args)
+
+ if self.args.report:
+ self.report(plan)
+ return 0
+
+ if not self.args.yes:
+ self.report(plan)
+ terminal.info('The above OSDs would be created if the operation continues')
+ if not prompt_bool('do you want to proceed? (yes/no)'):
+ terminal.error('aborting OSD provisioning')
+ raise SystemExit(0)
+
+ self._execute(plan)
+
+ def _execute(self, plan):
+ defaults = common.get_default_args()
+ global_args = [
+ 'bluestore',
+ 'filestore',
+ 'dmcrypt',
+ 'crush_device_class',
+ 'no_systemd',
+ ]
+ defaults.update({arg: getattr(self.args, arg) for arg in global_args})
+ for osd in plan:
+ args = osd.get_args(defaults)
+ if self.args.prepare:
+ p = Prepare([])
+ p.safe_prepare(argparse.Namespace(**args))
+ else:
+ c = Create([])
+ c.create(argparse.Namespace(**args))
+
+
+ def get_plan(self, args):
+ if args.bluestore:
+ plan = self.get_deployment_layout(args, args.devices, args.db_devices,
+ args.wal_devices)
+ elif args.filestore:
+ plan = self.get_deployment_layout(args, args.devices, args.journal_devices)
+ return plan
+
+ def get_deployment_layout(self, args, devices, fast_devices=[],
+ very_fast_devices=[]):
+ '''
+ The methods here are mostly just organization, error reporting and
+ setting up of (default) args. The heavy lifting code for the deployment
+ layout can be found in the static get_*_osds and get_*_fast_allocs
+ functions.
+ '''
+ plan = []
+ phys_devs, lvm_devs = separate_devices_from_lvs(devices)
+ mlogger.debug(('passed data devices: {} physical,'
+ ' {} LVM').format(len(phys_devs), len(lvm_devs)))
+
+ plan.extend(get_physical_osds(phys_devs, args))
+
+ plan.extend(get_lvm_osds(lvm_devs, args))
+
+ num_osds = len(plan)
+ if num_osds == 0:
+ mlogger.info('All data devices are unavailable')
+ return plan
+ requested_osds = args.osds_per_device * len(phys_devs) + len(lvm_devs)
+
+ fast_type = 'block_db' if args.bluestore else 'journal'
+ fast_allocations = self.fast_allocations(fast_devices,
+ requested_osds,
+ num_osds,
+ fast_type)
+ if fast_devices and not fast_allocations:
+ mlogger.info('{} fast devices were passed, but none are available'.format(len(fast_devices)))
+ return []
+ if fast_devices and not len(fast_allocations) == num_osds:
+ mlogger.error('{} fast allocations != {} num_osds'.format(
+ len(fast_allocations), num_osds))
+ exit(1)
+
+ very_fast_allocations = self.fast_allocations(very_fast_devices,
+ requested_osds,
+ num_osds,
+ 'block_wal')
+ if very_fast_devices and not very_fast_allocations:
+ mlogger.info('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
+ return []
+ if very_fast_devices and not len(very_fast_allocations) == num_osds:
+ mlogger.error('{} very fast allocations != {} num_osds'.format(
+ len(very_fast_allocations), num_osds))
+ exit(1)
+
+ for osd in plan:
+ if fast_devices:
+ osd.add_fast_device(*fast_allocations.pop(),
+ type_=fast_type)
+ if very_fast_devices and args.bluestore:
+ osd.add_very_fast_device(*very_fast_allocations.pop())
+ return plan
+
+ def fast_allocations(self, devices, requested_osds, new_osds, type_):
+ ret = []
+ if not devices:
+ return ret
+ phys_devs, lvm_devs = separate_devices_from_lvs(devices)
+ mlogger.debug(('passed {} devices: {} physical,'
+ ' {} LVM').format(type_, len(phys_devs), len(lvm_devs)))
+
+ ret.extend(get_lvm_fast_allocs(lvm_devs))
+
+ # fill up uneven distributions across fast devices: 5 osds and 2 fast
+ # devices? create 3 slots on each device rather then deploying
+ # heterogeneous osds
+ slot_divider = max(1, len(phys_devs))
+ if (requested_osds - len(lvm_devs)) % slot_divider:
+ fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider) + 1
+ else:
+ fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider)
+
+
+ ret.extend(get_physical_fast_allocs(phys_devs,
+ type_,
+ fast_slots_per_device,
+ new_osds,
+ self.args))
+ return ret
+
+ class OSD(object):
+ '''
+ This class simply stores info about to-be-deployed OSDs and provides an
+ easy way to retrieve the necessary create arguments.
+ '''
+ VolSpec = namedtuple('VolSpec',
+ ['path',
+ 'rel_size',
+ 'abs_size',
+ 'slots',
+ 'type_'])
+
+ def __init__(self,
+ data_path,
+ rel_size,
+ abs_size,
+ slots,
+ id_,
+ encryption,
+ symlink=None):
+ self.id_ = id_
+ self.data = self.VolSpec(path=data_path,
+ rel_size=rel_size,
+ abs_size=abs_size,
+ slots=slots,
+ type_='data')
+ self.fast = None
+ self.very_fast = None
+ self.encryption = encryption
+ self.symlink = symlink
+
+ def add_fast_device(self, path, rel_size, abs_size, slots, type_):
+ self.fast = self.VolSpec(path=path,
+ rel_size=rel_size,
+ abs_size=abs_size,
+ slots=slots,
+ type_=type_)
+
+ def add_very_fast_device(self, path, rel_size, abs_size, slots):
+ self.very_fast = self.VolSpec(path=path,
+ rel_size=rel_size,
+ abs_size=abs_size,
+ slots=slots,
+ type_='block_wal')
+
+ def _get_osd_plan(self):
+ plan = {
+ 'data': self.data.path,
+ 'data_size': self.data.abs_size,
+ 'encryption': self.encryption,
+ }
+ if self.fast:
+ type_ = self.fast.type_.replace('.', '_')
+ plan.update(
+ {
+ type_: self.fast.path,
+ '{}_size'.format(type_): self.fast.abs_size,
+ })
+ if self.very_fast:
+ plan.update(
+ {
+ 'block_wal': self.very_fast.path,
+ 'block_wal_size': self.very_fast.abs_size,
+ })
+ if self.id_:
+ plan.update({'osd_id': self.id_})
+ return plan
+
+ def get_args(self, defaults):
+ my_defaults = defaults.copy()
+ my_defaults.update(self._get_osd_plan())
+ return my_defaults
+
+ def report(self):
+ report = ''
+ if self.id_:
+ report += templates.osd_reused_id.format(
+ id_=self.id_)
+ if self.encryption:
+ report += templates.osd_encryption.format(
+ enc=self.encryption)
+ path = self.data.path
+ if self.symlink:
+ path = f'{self.symlink} -> {self.data.path}'
+ report += templates.osd_component.format(
+ _type=self.data.type_,
+ path=path,
+ size=self.data.abs_size,
+ percent=self.data.rel_size)
+ if self.fast:
+ report += templates.osd_component.format(
+ _type=self.fast.type_,
+ path=self.fast.path,
+ size=self.fast.abs_size,
+ percent=self.fast.rel_size)
+ if self.very_fast:
+ report += templates.osd_component.format(
+ _type=self.very_fast.type_,
+ path=self.very_fast.path,
+ size=self.very_fast.abs_size,
+ percent=self.very_fast.rel_size)
+ return report
+
+ def report_json(self):
+ # cast all values to string so that the report can be dumped in to
+ # json.dumps
+ return {k: str(v) for k, v in self._get_osd_plan().items()}
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/common.py b/src/ceph-volume/ceph_volume/devices/lvm/common.py
new file mode 100644
index 000000000..1134b1754
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/common.py
@@ -0,0 +1,190 @@
+from ceph_volume.util import arg_validators, disk
+from ceph_volume import process, conf
+from ceph_volume import terminal
+from ceph_volume.devices.lvm.zap import Zap
+import argparse
+
+
+def rollback_osd(args, osd_id=None):
+ """
+ When the process of creating or preparing fails, the OSD needs to be
+ destroyed so that the ID can be reused. This prevents from leaving the ID
+ around as "used" on the monitor, which can cause confusion if expecting
+ sequential OSD IDs.
+
+ The usage of `destroy-new` allows this to be done without requiring the
+ admin keyring (otherwise needed for destroy and purge commands)
+ """
+ if not osd_id:
+ # it means that it wasn't generated, so there is nothing to rollback here
+ return
+
+ # once here, this is an error condition that needs to be rolled back
+ terminal.error('Was unable to complete a new OSD, will rollback changes')
+ osd_name = 'osd.%s'
+ bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
+ cmd = [
+ 'ceph',
+ '--cluster', conf.cluster,
+ '--name', 'client.bootstrap-osd',
+ '--keyring', bootstrap_keyring,
+ 'osd', 'purge-new', osd_name % osd_id,
+ '--yes-i-really-mean-it',
+ ]
+
+ process.run(cmd)
+ Zap(['--destroy', '--osd-id', osd_id]).main()
+
+
+common_args = {
+ '--data': {
+ 'help': 'OSD data path. A physical device or logical volume',
+ 'required': True,
+ 'type': arg_validators.ValidDataDevice(as_string=True),
+ #'default':,
+ #'type':,
+ },
+ '--data-size': {
+ 'help': 'Size of data LV in case a device was passed in --data',
+ 'default': '0',
+ 'type': disk.Size.parse
+ },
+ '--data-slots': {
+ 'help': ('Intended number of slots on data device. The new OSD gets one'
+ 'of those slots or 1/nth of the available capacity'),
+ 'type': int,
+ 'default': 1,
+ },
+ '--osd-id': {
+ 'help': 'Reuse an existing OSD id',
+ 'default': None,
+ 'type': arg_validators.valid_osd_id,
+ },
+ '--osd-fsid': {
+ 'help': 'Reuse an existing OSD fsid',
+ 'default': None,
+ },
+ '--cluster-fsid': {
+ 'help': 'Specify the cluster fsid, useful when no ceph.conf is available',
+ 'default': None,
+ },
+ '--crush-device-class': {
+ 'dest': 'crush_device_class',
+ 'help': 'Crush device class to assign this OSD to',
+ 'default': "",
+ },
+ '--dmcrypt': {
+ 'action': 'store_true',
+ 'help': 'Enable device encryption via dm-crypt',
+ },
+ '--no-systemd': {
+ 'dest': 'no_systemd',
+ 'action': 'store_true',
+ 'help': 'Skip creating and enabling systemd units and starting OSD services when activating',
+ },
+}
+
+bluestore_args = {
+ '--bluestore': {
+ 'action': 'store_true',
+ 'help': 'Use the bluestore objectstore',
+ },
+ '--block.db': {
+ 'dest': 'block_db',
+ 'help': 'Path to bluestore block.db logical volume or device',
+ 'type': arg_validators.ValidDevice(as_string=True),
+ },
+ '--block.db-size': {
+ 'dest': 'block_db_size',
+ 'help': 'Size of block.db LV in case device was passed in --block.db',
+ 'default': '0',
+ 'type': disk.Size.parse
+ },
+ '--block.db-slots': {
+ 'dest': 'block_db_slots',
+ 'help': ('Intended number of slots on db device. The new OSD gets one'
+ 'of those slots or 1/nth of the available capacity'),
+ 'type': int,
+ 'default': 1,
+ },
+ '--block.wal': {
+ 'dest': 'block_wal',
+ 'help': 'Path to bluestore block.wal logical volume or device',
+ 'type': arg_validators.ValidDevice(as_string=True),
+ },
+ '--block.wal-size': {
+ 'dest': 'block_wal_size',
+ 'help': 'Size of block.wal LV in case device was passed in --block.wal',
+ 'default': '0',
+ 'type': disk.Size.parse
+ },
+ '--block.wal-slots': {
+ 'dest': 'block_wal_slots',
+ 'help': ('Intended number of slots on wal device. The new OSD gets one'
+ 'of those slots or 1/nth of the available capacity'),
+ 'type': int,
+ 'default': 1,
+ },
+}
+
+filestore_args = {
+ '--filestore': {
+ 'action': 'store_true',
+ 'help': 'Use the filestore objectstore',
+ },
+ '--journal': {
+ 'help': 'A logical volume (vg_name/lv_name), or path to a device',
+ 'type': arg_validators.ValidDevice(as_string=True),
+ },
+ '--journal-size': {
+ 'help': 'Size of journal LV in case a raw block device was passed in --journal',
+ 'default': '0',
+ 'type': disk.Size.parse
+ },
+ '--journal-slots': {
+ 'help': ('Intended number of slots on journal device. The new OSD gets one'
+ 'of those slots or 1/nth of the available capacity'),
+ 'type': int,
+ 'default': 1,
+ },
+}
+
+def get_default_args():
+ defaults = {}
+ def format_name(name):
+ return name.strip('-').replace('-', '_').replace('.', '_')
+ for argset in (common_args, filestore_args, bluestore_args):
+ defaults.update({format_name(name): val.get('default', None) for name, val in argset.items()})
+ return defaults
+
+
+def common_parser(prog, description):
+ """
+ Both prepare and create share the same parser, those are defined here to
+ avoid duplication
+ """
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=description,
+ )
+
+ filestore_group = parser.add_argument_group('filestore')
+ bluestore_group = parser.add_argument_group('bluestore')
+
+ for name, kwargs in common_args.items():
+ parser.add_argument(name, **kwargs)
+
+ for name, kwargs in bluestore_args.items():
+ bluestore_group.add_argument(name, **kwargs)
+
+ for name, kwargs in filestore_args.items():
+ filestore_group.add_argument(name, **kwargs)
+
+ # Do not parse args, so that consumers can do something before the args get
+ # parsed triggering argparse behavior
+ return parser
+
+
+create_parser = common_parser # noqa
+prepare_parser = common_parser # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/create.py b/src/ceph-volume/ceph_volume/devices/lvm/create.py
new file mode 100644
index 000000000..af2cd96c0
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/create.py
@@ -0,0 +1,77 @@
+from __future__ import print_function
+from textwrap import dedent
+import logging
+from ceph_volume.util import system
+from ceph_volume.util.arg_validators import exclude_group_options
+from ceph_volume import decorators, terminal
+from .common import create_parser, rollback_osd
+from .prepare import Prepare
+from .activate import Activate
+
+logger = logging.getLogger(__name__)
+
+
+class Create(object):
+
+ help = 'Create a new OSD from an LVM device'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def create(self, args):
+ if not args.osd_fsid:
+ args.osd_fsid = system.generate_uuid()
+ prepare_step = Prepare([])
+ prepare_step.safe_prepare(args)
+ osd_id = prepare_step.osd_id
+ try:
+ # we try this for activate only when 'creating' an OSD, because a rollback should not
+ # happen when doing normal activation. For example when starting an OSD, systemd will call
+ # activate, which would never need to be rolled back.
+ Activate([]).activate(args)
+ except Exception:
+ logger.exception('lvm activate was unable to complete, while creating the OSD')
+ logger.info('will rollback OSD ID creation')
+ rollback_osd(args, osd_id)
+ raise
+ terminal.success("ceph-volume lvm create successful for: %s" % args.data)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Create an OSD by assigning an ID and FSID, registering them with the
+ cluster with an ID and FSID, formatting and mounting the volume, adding
+ all the metadata to the logical volumes using LVM tags, and starting
+ the OSD daemon. This is a convenience command that combines the prepare
+ and activate steps.
+
+ Encryption is supported via dmcrypt and the --dmcrypt flag.
+
+ Existing logical volume (lv):
+
+ ceph-volume lvm create --data {vg/lv}
+
+ Existing block device (a logical volume will be created):
+
+ ceph-volume lvm create --data /path/to/device
+
+ Optionally, can consume db and wal block devices, partitions or logical
+ volumes. A device will get a logical volume, partitions and existing
+ logical volumes will be used as is:
+
+ ceph-volume lvm create --data {vg/lv} --block.wal {partition} --block.db {/path/to/device}
+ """)
+ parser = create_parser(
+ prog='ceph-volume lvm create',
+ description=sub_command_help,
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ exclude_group_options(parser, groups=['filestore', 'bluestore'], argv=self.argv)
+ args = parser.parse_args(self.argv)
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not args.bluestore and not args.filestore:
+ args.bluestore = True
+ self.create(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/deactivate.py b/src/ceph-volume/ceph_volume/devices/lvm/deactivate.py
new file mode 100644
index 000000000..46846a1dc
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/deactivate.py
@@ -0,0 +1,88 @@
+import argparse
+import logging
+import sys
+from textwrap import dedent
+from ceph_volume import conf
+from ceph_volume.util import encryption, system
+from ceph_volume.api.lvm import get_lvs_by_tag
+
+logger = logging.getLogger(__name__)
+
+
+def deactivate_osd(osd_id=None, osd_uuid=None):
+
+ lvs = []
+ if osd_uuid is not None:
+ lvs = get_lvs_by_tag('ceph.osd_fsid={}'.format(osd_uuid))
+ osd_id = next(lv.tags['ceph.osd_id'] for lv in lvs)
+ else:
+ lvs = get_lvs_by_tag('ceph.osd_id={}'.format(osd_id))
+
+ data_lv = next(lv for lv in lvs if lv.tags['ceph.type'] in ['data', 'block'])
+
+ conf.cluster = data_lv.tags['ceph.cluster_name']
+ logger.debug('Found cluster name {}'.format(conf.cluster))
+
+ tmpfs_path = '/var/lib/ceph/osd/{}-{}'.format(conf.cluster, osd_id)
+ system.unmount_tmpfs(tmpfs_path)
+
+ for lv in lvs:
+ if lv.tags.get('ceph.encrypted', '0') == '1':
+ encryption.dmcrypt_close(lv.lv_uuid)
+
+
+class Deactivate(object):
+
+ help = 'Deactivate OSDs'
+
+ def deactivate(self, args=None):
+ if args:
+ self.args = args
+ try:
+ deactivate_osd(self.args.osd_id, self.args.osd_uuid)
+ except StopIteration:
+ logger.error(('No data or block LV found for OSD'
+ '{}').format(self.args.osd_id))
+ sys.exit(1)
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def main(self):
+ sub_command_help = dedent("""
+ Deactivate unmounts and OSDs tmpfs and closes any crypt devices.
+
+ ceph-volume lvm deactivate {ID} {FSID}
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm deactivate',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'osd_id',
+ nargs='?',
+ help='The ID of the OSD'
+ )
+ parser.add_argument(
+ 'osd_uuid',
+ nargs='?',
+ help='The UUID of the OSD, similar to a SHA1, takes precedence over osd_id'
+ )
+ # parser.add_argument(
+ # '--all',
+ # action='store_true',
+ # help='Deactivate all OSD volumes found in the system',
+ # )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not args.osd_id and not args.osd_uuid:
+ raise ValueError(('Can not identify OSD, pass either all or'
+ 'osd_id or osd_uuid'))
+ self.deactivate(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/listing.py b/src/ceph-volume/ceph_volume/devices/lvm/listing.py
new file mode 100644
index 000000000..c16afdaa7
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/listing.py
@@ -0,0 +1,223 @@
+from __future__ import print_function
+import argparse
+import json
+import logging
+from textwrap import dedent
+from ceph_volume import decorators
+from ceph_volume.api import lvm as api
+
+logger = logging.getLogger(__name__)
+
+
+osd_list_header_template = """\n
+{osd_id:=^20}"""
+
+
+osd_device_header_template = """
+
+ {type: <13} {path}
+"""
+
+device_metadata_item_template = """
+ {tag_name: <25} {value}"""
+
+
+def readable_tag(tag):
+ actual_name = tag.split('.')[-1]
+ return actual_name.replace('_', ' ')
+
+
+def pretty_report(report):
+ output = []
+ for osd_id, devices in sorted(report.items()):
+ output.append(
+ osd_list_header_template.format(osd_id=" osd.%s " % osd_id)
+ )
+ for device in devices:
+ output.append(
+ osd_device_header_template.format(
+ type='[%s]' % device['type'],
+ path=device['path']
+ )
+ )
+ for tag_name, value in sorted(device.get('tags', {}).items()):
+ output.append(
+ device_metadata_item_template.format(
+ tag_name=readable_tag(tag_name),
+ value=value
+ )
+ )
+ if not device.get('devices'):
+ continue
+ else:
+ output.append(
+ device_metadata_item_template.format(
+ tag_name='devices',
+ value=','.join(device['devices'])
+ )
+ )
+
+ print(''.join(output))
+
+
+def direct_report():
+ """
+ Other non-cli consumers of listing information will want to consume the
+ report without the need to parse arguments or other flags. This helper
+ bypasses the need to deal with the class interface which is meant for cli
+ handling.
+ """
+ return List([]).full_report()
+
+
+# TODO: Perhaps, get rid of this class and simplify this module further?
+class List(object):
+
+ help = 'list logical volumes and devices associated with Ceph'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def list(self, args):
+ report = self.single_report(args.device) if args.device else \
+ self.full_report()
+ if args.format == 'json':
+ # If the report is empty, we don't return a non-zero exit status
+ # because it is assumed this is going to be consumed by automated
+ # systems like ceph-ansible which would be forced to ignore the
+ # non-zero exit status if all they need is the information in the
+ # JSON object
+ print(json.dumps(report, indent=4, sort_keys=True))
+ else:
+ if not report:
+ raise SystemExit('No valid Ceph lvm devices found')
+ pretty_report(report)
+
+ def create_report(self, lvs):
+ """
+ Create a report for LVM dev(s) passed. Returns '{}' to denote failure.
+ """
+
+ report = {}
+
+ pvs = api.get_pvs()
+
+ for lv in lvs:
+ if not api.is_ceph_device(lv):
+ continue
+
+ osd_id = lv.tags['ceph.osd_id']
+ report.setdefault(osd_id, [])
+ lv_report = lv.as_dict()
+
+ lv_report['devices'] = [pv.name for pv in pvs if pv.lv_uuid == lv.lv_uuid] if pvs else []
+ report[osd_id].append(lv_report)
+
+ phys_devs = self.create_report_non_lv_device(lv)
+ if phys_devs:
+ report[osd_id].append(phys_devs)
+
+ return report
+
+ def create_report_non_lv_device(self, lv):
+ report = {}
+ if lv.tags.get('ceph.type', '') in ['data', 'block']:
+ for dev_type in ['journal', 'wal', 'db']:
+ dev = lv.tags.get('ceph.{}_device'.format(dev_type), '')
+ # counting / in the device name seems brittle but should work,
+ # lvs will have 3
+ if dev and dev.count('/') == 2:
+ device_uuid = lv.tags.get('ceph.{}_uuid'.format(dev_type))
+ report = {'tags': {'PARTUUID': device_uuid},
+ 'type': dev_type,
+ 'path': dev}
+ return report
+
+ def full_report(self):
+ """
+ Create a report of all Ceph LVs. Returns '{}' to denote failure.
+ """
+ return self.create_report(api.get_lvs())
+
+ def single_report(self, arg):
+ """
+ Generate a report for a single device. This can be either a logical
+ volume in the form of vg/lv, a device with an absolute path like
+ /dev/sda1 or /dev/sda, or a list of devices under same OSD ID.
+
+ Return value '{}' denotes failure.
+ """
+ if isinstance(arg, int) or arg.isdigit():
+ lv = api.get_lvs_from_osd_id(arg)
+ elif arg[0] == '/':
+ lv = api.get_lvs_from_path(arg)
+ else:
+ lv = [api.get_single_lv(filters={'lv_name': arg.split('/')[1]})]
+
+ report = self.create_report(lv)
+
+ if not report:
+ # check if device is a non-lvm journals or wal/db
+ for dev_type in ['journal', 'wal', 'db']:
+ lvs = api.get_lvs(tags={
+ 'ceph.{}_device'.format(dev_type): arg})
+ if lvs:
+ # just taking the first lv here should work
+ lv = lvs[0]
+ phys_dev = self.create_report_non_lv_device(lv)
+ osd_id = lv.tags.get('ceph.osd_id')
+ if osd_id:
+ report[osd_id] = [phys_dev]
+
+
+ return report
+
+ def main(self):
+ sub_command_help = dedent("""
+ List devices or logical volumes associated with Ceph. An association is
+ determined if a device has information relating to an OSD. This is
+ verified by querying LVM's metadata and correlating it with devices.
+
+ The lvs associated with the OSD need to have been prepared previously,
+ so that all needed tags and metadata exist.
+
+ Full listing of all system devices associated with a cluster::
+
+ ceph-volume lvm list
+
+ List devices under same OSD ID::
+
+ ceph-volume lvm list <OSD-ID>
+
+ List a particular device, reporting all metadata about it::
+
+ ceph-volume lvm list /dev/sda1
+
+ List a logical volume, along with all its metadata (vg is a volume
+ group, and lv the logical volume name)::
+
+ ceph-volume lvm list {vg/lv}
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm list',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'device',
+ metavar='DEVICE',
+ nargs='?',
+ help='Path to an lv (as vg/lv) or to a device like /dev/sda1'
+ )
+
+ parser.add_argument(
+ '--format',
+ help='output format, defaults to "pretty"',
+ default='pretty',
+ choices=['json', 'pretty'],
+ )
+
+ args = parser.parse_args(self.argv)
+ self.list(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/main.py b/src/ceph-volume/ceph_volume/devices/lvm/main.py
new file mode 100644
index 000000000..39947454d
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/main.py
@@ -0,0 +1,54 @@
+import argparse
+from textwrap import dedent
+from ceph_volume import terminal
+from . import activate
+from . import deactivate
+from . import prepare
+from . import create
+from . import trigger
+from . import listing
+from . import zap
+from . import batch
+from . import migrate
+
+
+class LVM(object):
+
+ help = 'Use LVM and LVM-based technologies to deploy OSDs'
+
+ _help = dedent("""
+ Use LVM and LVM-based technologies to deploy OSDs
+
+ {sub_help}
+ """)
+
+ mapper = {
+ 'activate': activate.Activate,
+ 'deactivate': deactivate.Deactivate,
+ 'batch': batch.Batch,
+ 'prepare': prepare.Prepare,
+ 'create': create.Create,
+ 'trigger': trigger.Trigger,
+ 'list': listing.List,
+ 'zap': zap.Zap,
+ 'migrate': migrate.Migrate,
+ 'new-wal': migrate.NewWAL,
+ 'new-db': migrate.NewDB,
+ }
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def print_help(self, sub_help):
+ return self._help.format(sub_help=sub_help)
+
+ def main(self):
+ terminal.dispatch(self.mapper, self.argv)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=self.print_help(terminal.subhelp(self.mapper)),
+ )
+ parser.parse_args(self.argv)
+ if len(self.argv) <= 1:
+ return parser.print_help()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/migrate.py b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py
new file mode 100644
index 000000000..86159fd50
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py
@@ -0,0 +1,693 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+from textwrap import dedent
+from ceph_volume.util import system, disk, merge_dict
+from ceph_volume.util.device import Device
+from ceph_volume.util.arg_validators import valid_osd_id
+from ceph_volume import decorators, terminal, process
+from ceph_volume.api import lvm as api
+from ceph_volume.systemd import systemctl
+
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+def get_cluster_name(osd_id, osd_fsid):
+ """
+ From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
+ system that match those tag values, then return cluster_name for the first
+ one.
+ """
+ lv_tags = {}
+ lv_tags['ceph.osd_id'] = osd_id
+ lv_tags['ceph.osd_fsid'] = osd_fsid
+
+ lvs = api.get_lvs(tags=lv_tags)
+ if not lvs:
+ mlogger.error(
+ 'Unable to find any LV for source OSD: id:{} fsid:{}'.format(
+ osd_id, osd_fsid) )
+ raise SystemExit('Unexpected error, terminating')
+ return next(iter(lvs)).tags["ceph.cluster_name"]
+
+def get_osd_path(osd_id, osd_fsid):
+ return '/var/lib/ceph/osd/{}-{}'.format(
+ get_cluster_name(osd_id, osd_fsid), osd_id)
+
+def find_associated_devices(osd_id, osd_fsid):
+ """
+ From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
+ system that match those tag values, further detect if any partitions are
+ part of the OSD, and then return the set of LVs and partitions (if any).
+ """
+ lv_tags = {}
+ lv_tags['ceph.osd_id'] = osd_id
+ lv_tags['ceph.osd_fsid'] = osd_fsid
+
+ lvs = api.get_lvs(tags=lv_tags)
+ if not lvs:
+ mlogger.error(
+ 'Unable to find any LV for source OSD: id:{} fsid:{}'.format(
+ osd_id, osd_fsid) )
+ raise SystemExit('Unexpected error, terminating')
+
+ devices = set(ensure_associated_lvs(lvs, lv_tags))
+ return [(Device(path), type) for path, type in devices if path]
+
+def ensure_associated_lvs(lvs, lv_tags):
+ """
+ Go through each LV and ensure if backing devices (journal, wal, block)
+ are LVs or partitions, so that they can be accurately reported.
+ """
+ # look for many LVs for each backing type, because it is possible to
+ # receive a filtering for osd.1, and have multiple failed deployments
+ # leaving many journals with osd.1 - usually, only a single LV will be
+ # returned
+
+ block_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'block'}))
+ db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'}))
+ wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'}))
+ backing_devices = [(block_lvs, 'block'), (db_lvs, 'db'),
+ (wal_lvs, 'wal')]
+
+ verified_devices = []
+
+ for lv in lvs:
+ # go through each lv and append it, otherwise query `blkid` to find
+ # a physical device. Do this for each type (journal,db,wal) regardless
+ # if they have been processed in the previous LV, so that bad devices
+ # with the same ID can be caught
+ for ceph_lvs, type in backing_devices:
+
+ if ceph_lvs:
+ verified_devices.extend([(l.lv_path, type) for l in ceph_lvs])
+ continue
+
+ # must be a disk partition, by querying blkid by the uuid we are
+ # ensuring that the device path is always correct
+ try:
+ device_uuid = lv.tags['ceph.{}_uuid'.format(type)]
+ except KeyError:
+ # Bluestore will not have ceph.journal_uuid, and Filestore
+ # will not not have ceph.db_uuid
+ continue
+
+ osd_device = disk.get_device_from_partuuid(device_uuid)
+ if not osd_device:
+ # if the osd_device is not found by the partuuid, then it is
+ # not possible to ensure this device exists anymore, so skip it
+ continue
+ verified_devices.append((osd_device, type))
+
+ return verified_devices
+
+class VolumeTagTracker(object):
+ def __init__(self, devices, target_lv):
+ self.target_lv = target_lv
+ self.data_device = self.db_device = self.wal_device = None
+ for device, type in devices:
+ if type == 'block':
+ self.data_device = device
+ elif type == 'db':
+ self.db_device = device
+ elif type == 'wal':
+ self.wal_device = device
+ if not self.data_device:
+ mlogger.error('Data device not found')
+ raise SystemExit(
+ "Unexpected error, terminating")
+ if not self.data_device.is_lv:
+ mlogger.error('Data device isn\'t LVM')
+ raise SystemExit(
+ "Unexpected error, terminating")
+
+ self.old_target_tags = self.target_lv.tags.copy()
+ self.old_data_tags = (
+ self.data_device.lv_api.tags.copy()
+ if self.data_device.is_lv else None)
+ self.old_db_tags = (
+ self.db_device.lv_api.tags.copy()
+ if self.db_device and self.db_device.is_lv else None)
+ self.old_wal_tags = (
+ self.wal_device.lv_api.tags.copy()
+ if self.wal_device and self.wal_device.is_lv else None)
+
+ def update_tags_when_lv_create(self, create_type):
+ tags = {}
+ if not self.data_device.is_lv:
+ mlogger.warning(
+ 'Data device is not LVM, wouldn\'t update LVM tags')
+ else:
+ tags["ceph.{}_uuid".format(create_type)] = self.target_lv.lv_uuid
+ tags["ceph.{}_device".format(create_type)] = self.target_lv.lv_path
+ self.data_device.lv_api.set_tags(tags)
+
+ tags = self.data_device.lv_api.tags.copy()
+ tags["ceph.type"] = create_type
+ self.target_lv.set_tags(tags)
+
+ aux_dev = None
+ if create_type == "db" and self.wal_device:
+ aux_dev = self.wal_device
+ elif create_type == "wal" and self.db_device:
+ aux_dev = self.db_device
+ else:
+ return
+ if not aux_dev.is_lv:
+ mlogger.warning(
+ '{} device is not LVM, wouldn\'t update LVM tags'.format(
+ create_type.upper()))
+ else:
+ tags = {}
+ tags["ceph.{}_uuid".format(create_type)] = self.target_lv.lv_uuid
+ tags["ceph.{}_device".format(create_type)] = self.target_lv.lv_path
+ aux_dev.lv_api.set_tags(tags)
+
+ def remove_lvs(self, source_devices, target_type):
+ remaining_devices = [self.data_device, self.db_device, self.wal_device]
+
+ outdated_tags = []
+ for device, type in source_devices:
+ if type == "block" or type == target_type:
+ continue
+ remaining_devices.remove(device)
+ if device.is_lv:
+ outdated_tags.append("ceph.{}_uuid".format(type))
+ outdated_tags.append("ceph.{}_device".format(type))
+ device.lv_api.clear_tags()
+ if len(outdated_tags) > 0:
+ for d in remaining_devices:
+ if d and d.is_lv:
+ d.lv_api.clear_tags(outdated_tags)
+
+ def replace_lvs(self, source_devices, target_type):
+ remaining_devices = [self.data_device]
+ if self.db_device:
+ remaining_devices.append(self.db_device)
+ if self.wal_device:
+ remaining_devices.append(self.wal_device)
+
+ outdated_tags = []
+ for device, type in source_devices:
+ if type == "block":
+ continue
+ remaining_devices.remove(device)
+ if device.is_lv:
+ outdated_tags.append("ceph.{}_uuid".format(type))
+ outdated_tags.append("ceph.{}_device".format(type))
+ device.lv_api.clear_tags()
+
+ new_tags = {}
+ new_tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid
+ new_tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path
+
+ for d in remaining_devices:
+ if d and d.is_lv:
+ if len(outdated_tags) > 0:
+ d.lv_api.clear_tags(outdated_tags)
+ d.lv_api.set_tags(new_tags)
+
+ if not self.data_device.is_lv:
+ mlogger.warning(
+ 'Data device is not LVM, wouldn\'t properly update target LVM tags')
+ else:
+ tags = self.data_device.lv_api.tags.copy()
+
+ tags["ceph.type"] = target_type
+ tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid
+ tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path
+ self.target_lv.set_tags(tags)
+
+ def undo(self):
+ mlogger.info(
+ 'Undoing lv tag set')
+ if self.data_device:
+ if self.old_data_tags:
+ self.data_device.lv_api.set_tags(self.old_data_tags)
+ else:
+ self.data_device.lv_api.clear_tags()
+ if self.db_device:
+ if self.old_db_tags:
+ self.db_device.lv_api.set_tags(self.old_db_tags)
+ else:
+ self.db_device.lv_api.clear_tags()
+ if self.wal_device:
+ if self.old_wal_tags:
+ self.wal_device.lv_api.set_tags(self.old_wal_tags)
+ else:
+ self.wal_device.lv_api.clear_tags()
+ if self.old_target_tags:
+ self.target_lv.set_tags(self.old_target_tags)
+ else:
+ self.target_lv.clear_tags()
+
+class Migrate(object):
+
+ help = 'Migrate BlueFS data from to another LVM device'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.osd_id = None
+
+ def get_source_devices(self, devices, target_type=""):
+ ret = []
+ for device, type in devices:
+ if type == target_type:
+ continue
+ if type == 'block':
+ if 'data' not in self.args.from_:
+ continue;
+ elif type == 'db':
+ if 'db' not in self.args.from_:
+ continue;
+ elif type == 'wal':
+ if 'wal' not in self.args.from_:
+ continue;
+ ret.append([device, type])
+ if ret == []:
+ mlogger.error('Source device list is empty')
+ raise SystemExit(
+ 'Unable to migrate to : {}'.format(self.args.target))
+ return ret
+
+ # ceph-bluestore-tool uses the following replacement rules
+ # (in the order of precedence, stop on the first match)
+ # if source list has DB volume - target device replaces it.
+ # if source list has WAL volume - target device replace it.
+ # if source list has slow volume only - operation isn't permitted,
+ # requires explicit allocation via new-db/new-wal command.detects which
+ def get_target_type_by_source(self, devices):
+ ret = None
+ for device, type in devices:
+ if type == 'db':
+ return 'db'
+ elif type == 'wal':
+ ret = 'wal'
+ return ret
+
+ def get_filename_by_type(self, type):
+ filename = 'block'
+ if type == 'db' or type == 'wal':
+ filename += '.' + type
+ return filename
+
+ def get_source_args(self, osd_path, devices):
+ ret = []
+ for device, type in devices:
+ ret = ret + ["--devs-source", os.path.join(
+ osd_path, self.get_filename_by_type(type))]
+ return ret
+
+ @decorators.needs_root
+ def migrate_to_new(self, osd_id, osd_fsid, devices, target_lv):
+ source_devices = self.get_source_devices(devices)
+ target_type = self.get_target_type_by_source(source_devices)
+ if not target_type:
+ mlogger.error(
+ "Unable to determine new volume type,"
+ " please use new-db or new-wal command before.")
+ raise SystemExit(
+ "Unable to migrate to : {}".format(self.args.target))
+
+ target_path = target_lv.lv_path
+
+ try:
+ tag_tracker = VolumeTagTracker(devices, target_lv)
+ # we need to update lvm tags for all the remaining volumes
+ # and clear for ones which to be removed
+
+ # ceph-bluestore-tool removes source volume(s) other than block one
+ # and attaches target one after successful migration
+ tag_tracker.replace_lvs(source_devices, target_type)
+
+ osd_path = get_osd_path(osd_id, osd_fsid)
+ source_args = self.get_source_args(osd_path, source_devices)
+ mlogger.info("Migrate to new, Source: {} Target: {}".format(
+ source_args, target_path))
+ stdout, stderr, exit_code = process.call([
+ 'ceph-bluestore-tool',
+ '--path',
+ osd_path,
+ '--dev-target',
+ target_path,
+ '--command',
+ 'bluefs-bdev-migrate'] +
+ source_args)
+ if exit_code != 0:
+ mlogger.error(
+ 'Failed to migrate device, error code:{}'.format(exit_code))
+ raise SystemExit(
+ 'Failed to migrate to : {}'.format(self.args.target))
+ else:
+ system.chown(os.path.join(osd_path, "block.{}".format(
+ target_type)))
+ terminal.success('Migration successful.')
+ except:
+ tag_tracker.undo()
+ raise
+
+ return
+
+ @decorators.needs_root
+ def migrate_to_existing(self, osd_id, osd_fsid, devices, target_lv):
+ target_type = target_lv.tags["ceph.type"]
+ if target_type == "wal":
+ mlogger.error("Migrate to WAL is not supported")
+ raise SystemExit(
+ "Unable to migrate to : {}".format(self.args.target))
+ target_filename = self.get_filename_by_type(target_type)
+ if (target_filename == ""):
+ mlogger.error(
+ "Target Logical Volume doesn't have proper volume type "
+ "(ceph.type LVM tag): {}".format(target_type))
+ raise SystemExit(
+ "Unable to migrate to : {}".format(self.args.target))
+
+ osd_path = get_osd_path(osd_id, osd_fsid)
+ source_devices = self.get_source_devices(devices, target_type)
+ target_path = os.path.join(osd_path, target_filename)
+ tag_tracker = VolumeTagTracker(devices, target_lv)
+
+ try:
+ # ceph-bluestore-tool removes source volume(s) other than
+ # block and target ones after successful migration
+ tag_tracker.remove_lvs(source_devices, target_type)
+ source_args = self.get_source_args(osd_path, source_devices)
+ mlogger.info("Migrate to existing, Source: {} Target: {}".format(
+ source_args, target_path))
+ stdout, stderr, exit_code = process.call([
+ 'ceph-bluestore-tool',
+ '--path',
+ osd_path,
+ '--dev-target',
+ target_path,
+ '--command',
+ 'bluefs-bdev-migrate'] +
+ source_args)
+ if exit_code != 0:
+ mlogger.error(
+ 'Failed to migrate device, error code:{}'.format(exit_code))
+ raise SystemExit(
+ 'Failed to migrate to : {}'.format(self.args.target))
+ else:
+ terminal.success('Migration successful.')
+ except:
+ tag_tracker.undo()
+ raise
+
+ return
+
+ @decorators.needs_root
+ def migrate_osd(self):
+ if self.args.osd_id and not self.args.no_systemd:
+ osd_is_running = systemctl.osd_is_active(self.args.osd_id)
+ if osd_is_running:
+ mlogger.error('OSD is running, stop it with: '
+ 'systemctl stop ceph-osd@{}'.format(
+ self.args.osd_id))
+ raise SystemExit(
+ 'Unable to migrate devices associated with OSD ID: {}'
+ .format(self.args.osd_id))
+
+ target_lv = api.get_lv_by_fullname(self.args.target)
+ if not target_lv:
+ mlogger.error(
+ 'Target path "{}" is not a Logical Volume'.format(
+ self.args.target))
+ raise SystemExit(
+ 'Unable to migrate to : {}'.format(self.args.target))
+ devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
+ if (not target_lv.used_by_ceph):
+ self.migrate_to_new(self.args.osd_id, self.args.osd_fsid,
+ devices,
+ target_lv)
+ else:
+ if (target_lv.tags['ceph.osd_id'] != self.args.osd_id or
+ target_lv.tags['ceph.osd_fsid'] != self.args.osd_fsid):
+ mlogger.error(
+ 'Target Logical Volume isn\'t used by the specified OSD: '
+ '{} FSID: {}'.format(self.args.osd_id,
+ self.args.osd_fsid))
+ raise SystemExit(
+ 'Unable to migrate to : {}'.format(self.args.target))
+
+ self.migrate_to_existing(self.args.osd_id, self.args.osd_fsid,
+ devices,
+ target_lv)
+
+ def make_parser(self, prog, sub_command_help):
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ '--osd-id',
+ required=True,
+ help='Specify an OSD ID to detect associated devices for zapping',
+ type=valid_osd_id
+ )
+
+ parser.add_argument(
+ '--osd-fsid',
+ required=True,
+ help='Specify an OSD FSID to detect associated devices for zapping',
+ )
+ parser.add_argument(
+ '--target',
+ required=True,
+ help='Specify target Logical Volume (LV) to migrate data to',
+ )
+ parser.add_argument(
+ '--from',
+ nargs='*',
+ dest='from_',
+ required=True,
+ choices=['data', 'db', 'wal'],
+ help='Copy BlueFS data from DB device',
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip checking OSD systemd unit',
+ )
+ return parser
+
+ def main(self):
+ sub_command_help = dedent("""
+ Moves BlueFS data from source volume(s) to the target one, source
+ volumes (except the main (i.e. data or block) one) are removed on
+ success. LVM volumes are permitted for Target only, both already
+ attached or new logical one. In the latter case it is attached to OSD
+ replacing one of the source devices. Following replacement rules apply
+ (in the order of precedence, stop on the first match):
+ * if source list has DB volume - target device replaces it.
+ * if source list has WAL volume - target device replace it.
+ * if source list has slow volume only - operation is not permitted,
+ requires explicit allocation via new-db/new-wal command.
+
+ Example calls for supported scenarios:
+
+ Moves BlueFS data from main device to LV already attached as DB:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/db
+
+ Moves BlueFS data from shared main device to LV which will be attached
+ as a new DB:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/new_db
+
+ Moves BlueFS data from DB device to new LV, DB is replaced:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db --target vgname/new_db
+
+ Moves BlueFS data from main and DB devices to new LV, DB is replaced:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db --target vgname/new_db
+
+ Moves BlueFS data from main, DB and WAL devices to new LV, WAL is
+ removed and DB is replaced:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db wal --target vgname/new_db
+
+ Moves BlueFS data from main, DB and WAL devices to main device, WAL
+ and DB are removed:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db wal --target vgname/data
+
+ """)
+
+ parser = self.make_parser('ceph-volume lvm migrate', sub_command_help)
+
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+
+ self.args = parser.parse_args(self.argv)
+
+ self.migrate_osd()
+
+class NewVolume(object):
+ def __init__(self, create_type, argv):
+ self.create_type = create_type
+ self.argv = argv
+
+ def make_parser(self, prog, sub_command_help):
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ '--osd-id',
+ required=True,
+ help='Specify an OSD ID to attach new volume to',
+ type=valid_osd_id,
+ )
+
+ parser.add_argument(
+ '--osd-fsid',
+ required=True,
+ help='Specify an OSD FSIDto attach new volume to',
+ )
+ parser.add_argument(
+ '--target',
+ required=True,
+ help='Specify target Logical Volume (LV) to attach',
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip checking OSD systemd unit',
+ )
+ return parser
+
+ @decorators.needs_root
+ def make_new_volume(self, osd_id, osd_fsid, devices, target_lv):
+ osd_path = get_osd_path(osd_id, osd_fsid)
+ mlogger.info(
+ 'Making new volume at {} for OSD: {} ({})'.format(
+ target_lv.lv_path, osd_id, osd_path))
+ tag_tracker = VolumeTagTracker(devices, target_lv)
+
+ try:
+ tag_tracker.update_tags_when_lv_create(self.create_type)
+
+ stdout, stderr, exit_code = process.call([
+ 'ceph-bluestore-tool',
+ '--path',
+ osd_path,
+ '--dev-target',
+ target_lv.lv_path,
+ '--command',
+ 'bluefs-bdev-new-{}'.format(self.create_type)
+ ])
+ if exit_code != 0:
+ mlogger.error(
+ 'failed to attach new volume, error code:{}'.format(
+ exit_code))
+ raise SystemExit(
+ "Failed to attach new volume: {}".format(
+ self.args.target))
+ else:
+ system.chown(os.path.join(osd_path, "block.{}".format(
+ self.create_type)))
+ terminal.success('New volume attached.')
+ except:
+ tag_tracker.undo()
+ raise
+ return
+
+ @decorators.needs_root
+ def new_volume(self):
+ if self.args.osd_id and not self.args.no_systemd:
+ osd_is_running = systemctl.osd_is_active(self.args.osd_id)
+ if osd_is_running:
+ mlogger.error('OSD ID is running, stop it with:'
+ ' systemctl stop ceph-osd@{}'.format(self.args.osd_id))
+ raise SystemExit(
+ 'Unable to attach new volume for OSD: {}'.format(
+ self.args.osd_id))
+
+ target_lv = api.get_lv_by_fullname(self.args.target)
+ if not target_lv:
+ mlogger.error(
+ 'Target path {} is not a Logical Volume'.format(
+ self.args.target))
+ raise SystemExit(
+ 'Unable to attach new volume : {}'.format(self.args.target))
+ if target_lv.used_by_ceph:
+ mlogger.error(
+ 'Target Logical Volume is already used by ceph: {}'.format(
+ self.args.target))
+ raise SystemExit(
+ 'Unable to attach new volume : {}'.format(self.args.target))
+ else:
+ devices = find_associated_devices(self.args.osd_id,
+ self.args.osd_fsid)
+ self.make_new_volume(
+ self.args.osd_id,
+ self.args.osd_fsid,
+ devices,
+ target_lv)
+
+class NewWAL(NewVolume):
+
+ help = 'Allocate new WAL volume for OSD at specified Logical Volume'
+
+ def __init__(self, argv):
+ super(NewWAL, self).__init__("wal", argv)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Attaches the given logical volume to the given OSD as a WAL volume.
+ Logical volume format is vg/lv. Fails if OSD has already got attached DB.
+
+ Example:
+
+ Attach vgname/lvname as a WAL volume to OSD 1
+
+ ceph-volume lvm new-wal --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_wal
+ """)
+ parser = self.make_parser('ceph-volume lvm new-wal', sub_command_help)
+
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+
+ self.args = parser.parse_args(self.argv)
+
+ self.new_volume()
+
+class NewDB(NewVolume):
+
+ help = 'Allocate new DB volume for OSD at specified Logical Volume'
+
+ def __init__(self, argv):
+ super(NewDB, self).__init__("db", argv)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Attaches the given logical volume to the given OSD as a DB volume.
+ Logical volume format is vg/lv. Fails if OSD has already got attached DB.
+
+ Example:
+
+ Attach vgname/lvname as a DB volume to OSD 1
+
+ ceph-volume lvm new-db --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_db
+ """)
+
+ parser = self.make_parser('ceph-volume lvm new-db', sub_command_help)
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ self.args = parser.parse_args(self.argv)
+
+ self.new_volume()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/prepare.py b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
new file mode 100644
index 000000000..2f715fdba
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
@@ -0,0 +1,441 @@
+from __future__ import print_function
+import json
+import logging
+from textwrap import dedent
+from ceph_volume.util import prepare as prepare_utils
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume.util import system, disk
+from ceph_volume.util.arg_validators import exclude_group_options
+from ceph_volume import conf, decorators, terminal
+from ceph_volume.api import lvm as api
+from .common import prepare_parser, rollback_osd
+
+
+logger = logging.getLogger(__name__)
+
+
+def prepare_dmcrypt(key, device, device_type, tags):
+ """
+ Helper for devices that are encrypted. The operations needed for
+ block, db, wal, or data/journal devices are all the same
+ """
+ if not device:
+ return ''
+ tag_name = 'ceph.%s_uuid' % device_type
+ uuid = tags[tag_name]
+ # format data device
+ encryption_utils.luks_format(
+ key,
+ device
+ )
+ encryption_utils.luks_open(
+ key,
+ device,
+ uuid
+ )
+
+ return '/dev/mapper/%s' % uuid
+
+
+def prepare_filestore(device, journal, secrets, tags, osd_id, fsid):
+ """
+ :param device: The name of the logical volume to work with
+ :param journal: similar to device but can also be a regular/plain disk
+ :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
+ :param id_: The OSD id
+ :param fsid: The OSD fsid, also known as the OSD UUID
+ """
+ cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
+
+ # encryption-only operations
+ if secrets.get('dmcrypt_key'):
+ # format and open ('decrypt' devices) and re-assign the device and journal
+ # variables so that the rest of the process can use the mapper paths
+ key = secrets['dmcrypt_key']
+ device = prepare_dmcrypt(key, device, 'data', tags)
+ journal = prepare_dmcrypt(key, journal, 'journal', tags)
+
+ # vdo detection
+ is_vdo = api.is_vdo(device)
+ # create the directory
+ prepare_utils.create_osd_path(osd_id)
+ # format the device
+ prepare_utils.format_device(device)
+ # mount the data device
+ prepare_utils.mount_osd(device, osd_id, is_vdo=is_vdo)
+ # symlink the journal
+ prepare_utils.link_journal(journal, osd_id)
+ # get the latest monmap
+ prepare_utils.get_monmap(osd_id)
+ # prepare the osd filesystem
+ prepare_utils.osd_mkfs_filestore(osd_id, fsid, cephx_secret)
+ # write the OSD keyring if it doesn't exist already
+ prepare_utils.write_keyring(osd_id, cephx_secret)
+ if secrets.get('dmcrypt_key'):
+ # if the device is going to get activated right away, this can be done
+ # here, otherwise it will be recreated
+ encryption_utils.write_lockbox_keyring(
+ osd_id,
+ fsid,
+ tags['ceph.cephx_lockbox_secret']
+ )
+
+
+def prepare_bluestore(block, wal, db, secrets, tags, osd_id, fsid):
+ """
+ :param block: The name of the logical volume for the bluestore data
+ :param wal: a regular/plain disk or logical volume, to be used for block.wal
+ :param db: a regular/plain disk or logical volume, to be used for block.db
+ :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
+ :param id_: The OSD id
+ :param fsid: The OSD fsid, also known as the OSD UUID
+ """
+ cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
+ # encryption-only operations
+ if secrets.get('dmcrypt_key'):
+ # If encrypted, there is no need to create the lockbox keyring file because
+ # bluestore re-creates the files and does not have support for other files
+ # like the custom lockbox one. This will need to be done on activation.
+ # format and open ('decrypt' devices) and re-assign the device and journal
+ # variables so that the rest of the process can use the mapper paths
+ key = secrets['dmcrypt_key']
+ block = prepare_dmcrypt(key, block, 'block', tags)
+ wal = prepare_dmcrypt(key, wal, 'wal', tags)
+ db = prepare_dmcrypt(key, db, 'db', tags)
+
+ # create the directory
+ prepare_utils.create_osd_path(osd_id, tmpfs=True)
+ # symlink the block
+ prepare_utils.link_block(block, osd_id)
+ # get the latest monmap
+ prepare_utils.get_monmap(osd_id)
+ # write the OSD keyring if it doesn't exist already
+ prepare_utils.write_keyring(osd_id, cephx_secret)
+ # prepare the osd filesystem
+ prepare_utils.osd_mkfs_bluestore(
+ osd_id, fsid,
+ keyring=cephx_secret,
+ wal=wal,
+ db=db
+ )
+
+
+class Prepare(object):
+
+ help = 'Format an LVM device and associate it with an OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.osd_id = None
+
+ def get_ptuuid(self, argument):
+ uuid = disk.get_partuuid(argument)
+ if not uuid:
+ terminal.error('blkid could not detect a PARTUUID for device: %s' % argument)
+ raise RuntimeError('unable to use device')
+ return uuid
+
+ def setup_device(self, device_type, device_name, tags, size, slots):
+ """
+ Check if ``device`` is an lv, if so, set the tags, making sure to
+ update the tags with the lv_uuid and lv_path which the incoming tags
+ will not have.
+
+ If the device is not a logical volume, then retrieve the partition UUID
+ by querying ``blkid``
+ """
+ if device_name is None:
+ return '', '', tags
+ tags['ceph.type'] = device_type
+ tags['ceph.vdo'] = api.is_vdo(device_name)
+
+ try:
+ vg_name, lv_name = device_name.split('/')
+ lv = api.get_single_lv(filters={'lv_name': lv_name,
+ 'vg_name': vg_name})
+ except ValueError:
+ lv = None
+
+ if lv:
+ lv_uuid = lv.lv_uuid
+ path = lv.lv_path
+ tags['ceph.%s_uuid' % device_type] = lv_uuid
+ tags['ceph.%s_device' % device_type] = path
+ lv.set_tags(tags)
+ elif disk.is_device(device_name):
+ # We got a disk, create an lv
+ lv_type = "osd-{}".format(device_type)
+ name_uuid = system.generate_uuid()
+ kwargs = {
+ 'device': device_name,
+ 'tags': tags,
+ 'slots': slots
+ }
+ #TODO use get_block_db_size and co here to get configured size in
+ #conf file
+ if size != 0:
+ kwargs['size'] = size
+ lv = api.create_lv(
+ lv_type,
+ name_uuid,
+ **kwargs)
+ path = lv.lv_path
+ tags['ceph.{}_device'.format(device_type)] = path
+ tags['ceph.{}_uuid'.format(device_type)] = lv.lv_uuid
+ lv_uuid = lv.lv_uuid
+ lv.set_tags(tags)
+ else:
+ # otherwise assume this is a regular disk partition
+ name_uuid = self.get_ptuuid(device_name)
+ path = device_name
+ tags['ceph.%s_uuid' % device_type] = name_uuid
+ tags['ceph.%s_device' % device_type] = path
+ lv_uuid = name_uuid
+ return path, lv_uuid, tags
+
+ def prepare_data_device(self, device_type, osd_uuid):
+ """
+ Check if ``arg`` is a device or partition to create an LV out of it
+ with a distinct volume group name, assigning LV tags on it and
+ ultimately, returning the logical volume object. Failing to detect
+ a device or partition will result in error.
+
+ :param arg: The value of ``--data`` when parsing args
+ :param device_type: Usually, either ``data`` or ``block`` (filestore vs. bluestore)
+ :param osd_uuid: The OSD uuid
+ """
+ device = self.args.data
+ if disk.is_partition(device) or disk.is_device(device):
+ # we must create a vg, and then a single lv
+ lv_name_prefix = "osd-{}".format(device_type)
+ kwargs = {'device': device,
+ 'tags': {'ceph.type': device_type},
+ 'slots': self.args.data_slots,
+ }
+ logger.debug('data device size: {}'.format(self.args.data_size))
+ if self.args.data_size != 0:
+ kwargs['size'] = self.args.data_size
+ return api.create_lv(
+ lv_name_prefix,
+ osd_uuid,
+ **kwargs)
+ else:
+ error = [
+ 'Cannot use device ({}).'.format(device),
+ 'A vg/lv path or an existing device is needed']
+ raise RuntimeError(' '.join(error))
+
+ raise RuntimeError('no data logical volume found with: {}'.format(device))
+
+ def safe_prepare(self, args=None):
+ """
+ An intermediate step between `main()` and `prepare()` so that we can
+ capture the `self.osd_id` in case we need to rollback
+
+ :param args: Injected args, usually from `lvm create` which compounds
+ both `prepare` and `create`
+ """
+ if args is not None:
+ self.args = args
+
+ try:
+ vgname, lvname = self.args.data.split('/')
+ lv = api.get_single_lv(filters={'lv_name': lvname,
+ 'vg_name': vgname})
+ except ValueError:
+ lv = None
+
+ if api.is_ceph_device(lv):
+ logger.info("device {} is already used".format(self.args.data))
+ raise RuntimeError("skipping {}, it is already prepared".format(self.args.data))
+ try:
+ self.prepare()
+ except Exception:
+ logger.exception('lvm prepare was unable to complete')
+ logger.info('will rollback OSD ID creation')
+ rollback_osd(self.args, self.osd_id)
+ raise
+ terminal.success("ceph-volume lvm prepare successful for: %s" % self.args.data)
+
+ def get_cluster_fsid(self):
+ """
+ Allows using --cluster-fsid as an argument, but can fallback to reading
+ from ceph.conf if that is unset (the default behavior).
+ """
+ if self.args.cluster_fsid:
+ return self.args.cluster_fsid
+ else:
+ return conf.ceph.get('global', 'fsid')
+
+ @decorators.needs_root
+ def prepare(self):
+ # FIXME we don't allow re-using a keyring, we always generate one for the
+ # OSD, this needs to be fixed. This could either be a file (!) or a string
+ # (!!) or some flags that we would need to compound into a dict so that we
+ # can convert to JSON (!!!)
+ secrets = {'cephx_secret': prepare_utils.create_key()}
+ cephx_lockbox_secret = ''
+ encrypted = 1 if self.args.dmcrypt else 0
+ cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key()
+
+ if encrypted:
+ secrets['dmcrypt_key'] = encryption_utils.create_dmcrypt_key()
+ secrets['cephx_lockbox_secret'] = cephx_lockbox_secret
+
+ cluster_fsid = self.get_cluster_fsid()
+
+ osd_fsid = self.args.osd_fsid or system.generate_uuid()
+ crush_device_class = self.args.crush_device_class
+ if crush_device_class:
+ secrets['crush_device_class'] = crush_device_class
+ # reuse a given ID if it exists, otherwise create a new ID
+ self.osd_id = prepare_utils.create_id(osd_fsid, json.dumps(secrets), osd_id=self.args.osd_id)
+ tags = {
+ 'ceph.osd_fsid': osd_fsid,
+ 'ceph.osd_id': self.osd_id,
+ 'ceph.cluster_fsid': cluster_fsid,
+ 'ceph.cluster_name': conf.cluster,
+ 'ceph.crush_device_class': crush_device_class,
+ 'ceph.osdspec_affinity': prepare_utils.get_osdspec_affinity()
+ }
+ if self.args.filestore:
+ if not self.args.journal:
+ logger.info(('no journal was specifed, creating journal lv '
+ 'on {}').format(self.args.data))
+ self.args.journal = self.args.data
+ self.args.journal_size = disk.Size(g=5)
+ # need to adjust data size/slots for colocated journal
+ if self.args.data_size:
+ self.args.data_size -= self.args.journal_size
+ if self.args.data_slots == 1:
+ self.args.data_slots = 0
+ else:
+ raise RuntimeError('Can\'t handle multiple filestore OSDs '
+ 'with colocated journals yet. Please '
+ 'create journal LVs manually')
+ tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
+ tags['ceph.encrypted'] = encrypted
+
+ journal_device, journal_uuid, tags = self.setup_device(
+ 'journal',
+ self.args.journal,
+ tags,
+ self.args.journal_size,
+ self.args.journal_slots)
+
+ try:
+ vg_name, lv_name = self.args.data.split('/')
+ data_lv = api.get_single_lv(filters={'lv_name': lv_name,
+ 'vg_name': vg_name})
+ except ValueError:
+ data_lv = None
+
+ if not data_lv:
+ data_lv = self.prepare_data_device('data', osd_fsid)
+
+ tags['ceph.data_device'] = data_lv.lv_path
+ tags['ceph.data_uuid'] = data_lv.lv_uuid
+ tags['ceph.vdo'] = api.is_vdo(data_lv.lv_path)
+ tags['ceph.type'] = 'data'
+ data_lv.set_tags(tags)
+ if not journal_device.startswith('/'):
+ # we got a journal lv, set rest of the tags
+ api.get_single_lv(filters={'lv_name': lv_name,
+ 'vg_name': vg_name}).set_tags(tags)
+
+ prepare_filestore(
+ data_lv.lv_path,
+ journal_device,
+ secrets,
+ tags,
+ self.osd_id,
+ osd_fsid,
+ )
+ elif self.args.bluestore:
+ try:
+ vg_name, lv_name = self.args.data.split('/')
+ block_lv = api.get_single_lv(filters={'lv_name': lv_name,
+ 'vg_name': vg_name})
+ except ValueError:
+ block_lv = None
+
+ if not block_lv:
+ block_lv = self.prepare_data_device('block', osd_fsid)
+
+ tags['ceph.block_device'] = block_lv.lv_path
+ tags['ceph.block_uuid'] = block_lv.lv_uuid
+ tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
+ tags['ceph.encrypted'] = encrypted
+ tags['ceph.vdo'] = api.is_vdo(block_lv.lv_path)
+
+ wal_device, wal_uuid, tags = self.setup_device(
+ 'wal',
+ self.args.block_wal,
+ tags,
+ self.args.block_wal_size,
+ self.args.block_wal_slots)
+ db_device, db_uuid, tags = self.setup_device(
+ 'db',
+ self.args.block_db,
+ tags,
+ self.args.block_db_size,
+ self.args.block_db_slots)
+
+ tags['ceph.type'] = 'block'
+ block_lv.set_tags(tags)
+
+ prepare_bluestore(
+ block_lv.lv_path,
+ wal_device,
+ db_device,
+ secrets,
+ tags,
+ self.osd_id,
+ osd_fsid,
+ )
+
+ def main(self):
+ sub_command_help = dedent("""
+ Prepare an OSD by assigning an ID and FSID, registering them with the
+ cluster with an ID and FSID, formatting and mounting the volume, and
+ finally by adding all the metadata to the logical volumes using LVM
+ tags, so that it can later be discovered.
+
+ Once the OSD is ready, an ad-hoc systemd unit will be enabled so that
+ it can later get activated and the OSD daemon can get started.
+
+ Encryption is supported via dmcrypt and the --dmcrypt flag.
+
+ Existing logical volume (lv):
+
+ ceph-volume lvm prepare --data {vg/lv}
+
+ Existing block device (a logical volume will be created):
+
+ ceph-volume lvm prepare --data /path/to/device
+
+ Optionally, can consume db and wal devices, partitions or logical
+ volumes. A device will get a logical volume, partitions and existing
+ logical volumes will be used as is:
+
+ ceph-volume lvm prepare --data {vg/lv} --block.wal {partition} --block.db {/path/to/device}
+ """)
+ parser = prepare_parser(
+ prog='ceph-volume lvm prepare',
+ description=sub_command_help,
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ exclude_group_options(parser, argv=self.argv, groups=['filestore', 'bluestore'])
+ self.args = parser.parse_args(self.argv)
+ # the unfortunate mix of one superset for both filestore and bluestore
+ # makes this validation cumbersome
+ if self.args.filestore:
+ if not self.args.journal:
+ raise SystemExit('--journal is required when using --filestore')
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not self.args.bluestore and not self.args.filestore:
+ self.args.bluestore = True
+ self.safe_prepare()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/trigger.py b/src/ceph-volume/ceph_volume/devices/lvm/trigger.py
new file mode 100644
index 000000000..dc57011df
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/trigger.py
@@ -0,0 +1,70 @@
+from __future__ import print_function
+import argparse
+from textwrap import dedent
+from ceph_volume.exceptions import SuffixParsingError
+from ceph_volume import decorators
+from .activate import Activate
+
+
+def parse_osd_id(string):
+ osd_id = string.split('-', 1)[0]
+ if not osd_id:
+ raise SuffixParsingError('OSD id', string)
+ if osd_id.isdigit():
+ return osd_id
+ raise SuffixParsingError('OSD id', string)
+
+
+def parse_osd_uuid(string):
+ osd_id = '%s-' % parse_osd_id(string)
+ # remove the id first
+ osd_uuid = string.split(osd_id, 1)[-1]
+ if not osd_uuid:
+ raise SuffixParsingError('OSD uuid', string)
+ return osd_uuid
+
+
+class Trigger(object):
+
+ help = 'systemd helper to activate an OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def main(self):
+ sub_command_help = dedent("""
+ ** DO NOT USE DIRECTLY **
+ This tool is meant to help the systemd unit that knows about OSDs.
+
+ Proxy OSD activation to ``ceph-volume lvm activate`` by parsing the
+ input from systemd, detecting the UUID and ID associated with an OSD::
+
+ ceph-volume lvm trigger {SYSTEMD-DATA}
+
+ The systemd "data" is expected to be in the format of::
+
+ {OSD ID}-{OSD UUID}
+
+ The lvs associated with the OSD need to have been prepared previously,
+ so that all needed tags and metadata exist.
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm trigger',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'systemd_data',
+ metavar='SYSTEMD_DATA',
+ nargs='?',
+ help='Data from a systemd unit containing ID and UUID of the OSD, like asdf-lkjh-0'
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ osd_id = parse_osd_id(args.systemd_data)
+ osd_uuid = parse_osd_uuid(args.systemd_data)
+ Activate(['--auto-detect-objectstore', osd_id, osd_uuid]).main()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
new file mode 100644
index 000000000..d6d778d16
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
@@ -0,0 +1,406 @@
+import argparse
+import os
+import logging
+import time
+
+from textwrap import dedent
+
+from ceph_volume import decorators, terminal, process
+from ceph_volume.api import lvm as api
+from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int, merge_dict
+from ceph_volume.util.device import Device
+from ceph_volume.systemd import systemctl
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+
+def wipefs(path):
+ """
+ Removes the filesystem from an lv or partition.
+
+ Environment variables supported::
+
+ * ``CEPH_VOLUME_WIPEFS_TRIES``: Defaults to 8
+ * ``CEPH_VOLUME_WIPEFS_INTERVAL``: Defaults to 5
+
+ """
+ tries = str_to_int(
+ os.environ.get('CEPH_VOLUME_WIPEFS_TRIES', 8)
+ )
+ interval = str_to_int(
+ os.environ.get('CEPH_VOLUME_WIPEFS_INTERVAL', 5)
+ )
+
+ for trying in range(tries):
+ stdout, stderr, exit_code = process.call([
+ 'wipefs',
+ '--all',
+ path
+ ])
+ if exit_code != 0:
+ # this could narrow the retry by poking in the stderr of the output
+ # to verify that 'probing initialization failed' appears, but
+ # better to be broad in this retry to prevent missing on
+ # a different message that needs to be retried as well
+ terminal.warning(
+ 'failed to wipefs device, will try again to workaround probable race condition'
+ )
+ time.sleep(interval)
+ else:
+ return
+ raise RuntimeError("could not complete wipefs on device: %s" % path)
+
+
+def zap_data(path):
+ """
+ Clears all data from the given path. Path should be
+ an absolute path to an lv or partition.
+
+ 10M of data is written to the path to make sure that
+ there is no trace left of any previous Filesystem.
+ """
+ process.run([
+ 'dd',
+ 'if=/dev/zero',
+ 'of={path}'.format(path=path),
+ 'bs=1M',
+ 'count=10',
+ 'conv=fsync'
+ ])
+
+
+def find_associated_devices(osd_id=None, osd_fsid=None):
+ """
+ From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
+ system that match those tag values, further detect if any partitions are
+ part of the OSD, and then return the set of LVs and partitions (if any).
+ """
+ lv_tags = {}
+ if osd_id:
+ lv_tags['ceph.osd_id'] = osd_id
+ if osd_fsid:
+ lv_tags['ceph.osd_fsid'] = osd_fsid
+
+ lvs = api.get_lvs(tags=lv_tags)
+ if not lvs:
+ raise RuntimeError('Unable to find any LV for zapping OSD: '
+ '%s' % osd_id or osd_fsid)
+
+ devices_to_zap = ensure_associated_lvs(lvs, lv_tags)
+ return [Device(path) for path in set(devices_to_zap) if path]
+
+
+def ensure_associated_lvs(lvs, lv_tags={}):
+ """
+ Go through each LV and ensure if backing devices (journal, wal, block)
+ are LVs or partitions, so that they can be accurately reported.
+ """
+ # look for many LVs for each backing type, because it is possible to
+ # receive a filtering for osd.1, and have multiple failed deployments
+ # leaving many journals with osd.1 - usually, only a single LV will be
+ # returned
+
+ journal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'journal'}))
+ db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'}))
+ wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'}))
+ backing_devices = [(journal_lvs, 'journal'), (db_lvs, 'db'),
+ (wal_lvs, 'wal')]
+
+ verified_devices = []
+
+ for lv in lvs:
+ # go through each lv and append it, otherwise query `blkid` to find
+ # a physical device. Do this for each type (journal,db,wal) regardless
+ # if they have been processed in the previous LV, so that bad devices
+ # with the same ID can be caught
+ for ceph_lvs, _type in backing_devices:
+ if ceph_lvs:
+ verified_devices.extend([l.lv_path for l in ceph_lvs])
+ continue
+
+ # must be a disk partition, by querying blkid by the uuid we are
+ # ensuring that the device path is always correct
+ try:
+ device_uuid = lv.tags['ceph.%s_uuid' % _type]
+ except KeyError:
+ # Bluestore will not have ceph.journal_uuid, and Filestore
+ # will not not have ceph.db_uuid
+ continue
+
+ osd_device = disk.get_device_from_partuuid(device_uuid)
+ if not osd_device:
+ # if the osd_device is not found by the partuuid, then it is
+ # not possible to ensure this device exists anymore, so skip it
+ continue
+ verified_devices.append(osd_device)
+
+ verified_devices.append(lv.lv_path)
+
+ # reduce the list from all the duplicates that were added
+ return list(set(verified_devices))
+
+
+class Zap(object):
+
+ help = 'Removes all data and filesystems from a logical volume or partition.'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def unmount_lv(self, lv):
+ if lv.tags.get('ceph.cluster_name') and lv.tags.get('ceph.osd_id'):
+ lv_path = "/var/lib/ceph/osd/{}-{}".format(lv.tags['ceph.cluster_name'], lv.tags['ceph.osd_id'])
+ else:
+ lv_path = lv.lv_path
+ dmcrypt_uuid = lv.lv_uuid
+ dmcrypt = lv.encrypted
+ if system.path_is_mounted(lv_path):
+ mlogger.info("Unmounting %s", lv_path)
+ system.unmount(lv_path)
+ if dmcrypt and dmcrypt_uuid:
+ self.dmcrypt_close(dmcrypt_uuid)
+
+ def zap_lv(self, device):
+ """
+ Device examples: vg-name/lv-name, /dev/vg-name/lv-name
+ Requirements: Must be a logical volume (LV)
+ """
+ lv = api.get_single_lv(filters={'lv_name': device.lv_name, 'vg_name':
+ device.vg_name})
+ pv = api.get_single_pv(filters={'lv_uuid': lv.lv_uuid})
+ self.unmount_lv(lv)
+
+ wipefs(device.path)
+ zap_data(device.path)
+
+ if self.args.destroy:
+ lvs = api.get_lvs(filters={'vg_name': device.vg_name})
+ if lvs == []:
+ mlogger.info('No LVs left, exiting', device.vg_name)
+ return
+ elif len(lvs) <= 1:
+ mlogger.info('Only 1 LV left in VG, will proceed to destroy '
+ 'volume group %s', device.vg_name)
+ api.remove_vg(device.vg_name)
+ api.remove_pv(pv.pv_name)
+ else:
+ mlogger.info('More than 1 LV left in VG, will proceed to '
+ 'destroy LV only')
+ mlogger.info('Removing LV because --destroy was given: %s',
+ device.path)
+ api.remove_lv(device.path)
+ elif lv:
+ # just remove all lvm metadata, leaving the LV around
+ lv.clear_tags()
+
+ def zap_partition(self, device):
+ """
+ Device example: /dev/sda1
+ Requirements: Must be a partition
+ """
+ if device.is_encrypted:
+ # find the holder
+ holders = [
+ '/dev/%s' % holder for holder in device.sys_api.get('holders', [])
+ ]
+ for mapper_uuid in os.listdir('/dev/mapper'):
+ mapper_path = os.path.join('/dev/mapper', mapper_uuid)
+ if os.path.realpath(mapper_path) in holders:
+ self.dmcrypt_close(mapper_uuid)
+
+ if system.device_is_mounted(device.path):
+ mlogger.info("Unmounting %s", device.path)
+ system.unmount(device.path)
+
+ wipefs(device.path)
+ zap_data(device.path)
+
+ if self.args.destroy:
+ mlogger.info("Destroying partition since --destroy was used: %s" % device.path)
+ disk.remove_partition(device)
+
+ def zap_lvm_member(self, device):
+ """
+ An LVM member may have more than one LV and or VG, for example if it is
+ a raw device with multiple partitions each belonging to a different LV
+
+ Device example: /dev/sda
+ Requirements: An LV or VG present in the device, making it an LVM member
+ """
+ for lv in device.lvs:
+ if lv.lv_name:
+ mlogger.info('Zapping lvm member {}. lv_path is {}'.format(device.path, lv.lv_path))
+ self.zap_lv(Device(lv.lv_path))
+ else:
+ vg = api.get_single_vg(filters={'vg_name': lv.vg_name})
+ if vg:
+ mlogger.info('Found empty VG {}, removing'.format(vg.vg_name))
+ api.remove_vg(vg.vg_name)
+
+
+
+ def zap_raw_device(self, device):
+ """
+ Any whole (raw) device passed in as input will be processed here,
+ checking for LVM membership and partitions (if any).
+
+ Device example: /dev/sda
+ Requirements: None
+ """
+ if not self.args.destroy:
+ # the use of dd on a raw device causes the partition table to be
+ # destroyed
+ mlogger.warning(
+ '--destroy was not specified, but zapping a whole device will remove the partition table'
+ )
+
+ # look for partitions and zap those
+ for part_name in device.sys_api.get('partitions', {}).keys():
+ self.zap_partition(Device('/dev/%s' % part_name))
+
+ wipefs(device.path)
+ zap_data(device.path)
+
+ @decorators.needs_root
+ def zap(self, devices=None):
+ devices = devices or self.args.devices
+
+ for device in devices:
+ mlogger.info("Zapping: %s", device.path)
+ if device.is_mapper and not device.is_mpath:
+ terminal.error("Refusing to zap the mapper device: {}".format(device))
+ raise SystemExit(1)
+ if device.is_lvm_member:
+ self.zap_lvm_member(device)
+ if device.is_lv:
+ self.zap_lv(device)
+ if device.is_partition:
+ self.zap_partition(device)
+ if device.is_device:
+ self.zap_raw_device(device)
+
+ if self.args.devices:
+ terminal.success(
+ "Zapping successful for: %s" % ", ".join([str(d) for d in self.args.devices])
+ )
+ else:
+ identifier = self.args.osd_id or self.args.osd_fsid
+ terminal.success(
+ "Zapping successful for OSD: %s" % identifier
+ )
+
+ @decorators.needs_root
+ def zap_osd(self):
+ if self.args.osd_id and not self.args.no_systemd:
+ osd_is_running = systemctl.osd_is_active(self.args.osd_id)
+ if osd_is_running:
+ mlogger.error("OSD ID %s is running, stop it with:" % self.args.osd_id)
+ mlogger.error("systemctl stop ceph-osd@%s" % self.args.osd_id)
+ raise SystemExit("Unable to zap devices associated with OSD ID: %s" % self.args.osd_id)
+ devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
+ self.zap(devices)
+
+ def dmcrypt_close(self, dmcrypt_uuid):
+ dmcrypt_path = "/dev/mapper/{}".format(dmcrypt_uuid)
+ mlogger.info("Closing encrypted path %s", dmcrypt_path)
+ encryption.dmcrypt_close(dmcrypt_path)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Zaps the given logical volume(s), raw device(s) or partition(s) for reuse by ceph-volume.
+ If given a path to a logical volume it must be in the format of vg/lv. Any
+ filesystems present on the given device, vg/lv, or partition will be removed and
+ all data will be purged.
+
+ If the logical volume, raw device or partition is being used for any ceph related
+ mount points they will be unmounted.
+
+ However, the lv or partition will be kept intact.
+
+ Example calls for supported scenarios:
+
+ Zapping a logical volume:
+
+ ceph-volume lvm zap {vg name/lv name}
+
+ Zapping a partition:
+
+ ceph-volume lvm zap /dev/sdc1
+
+ Zapping many raw devices:
+
+ ceph-volume lvm zap /dev/sda /dev/sdb /db/sdc
+
+ Zapping devices associated with an OSD ID:
+
+ ceph-volume lvm zap --osd-id 1
+
+ Optionally include the OSD FSID
+
+ ceph-volume lvm zap --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D
+
+ If the --destroy flag is given and you are zapping a raw device or partition
+ then all vgs and lvs that exist on that raw device or partition will be destroyed.
+
+ This is especially useful if a raw device or partition was used by ceph-volume lvm create
+ or ceph-volume lvm prepare commands previously and now you want to reuse that device.
+
+ For example:
+
+ ceph-volume lvm zap /dev/sda --destroy
+
+ If the --destroy flag is given and you are zapping an lv then the lv is still
+ kept intact for reuse.
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm zap',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'devices',
+ metavar='DEVICES',
+ nargs='*',
+ type=arg_validators.ValidZapDevice(gpt_ok=True),
+ default=[],
+ help='Path to one or many lv (as vg/lv), partition (as /dev/sda1) or device (as /dev/sda)'
+ )
+
+ parser.add_argument(
+ '--destroy',
+ action='store_true',
+ default=False,
+ help='Destroy all volume groups and logical volumes if you are zapping a raw device or partition',
+ )
+
+ parser.add_argument(
+ '--osd-id',
+ type=arg_validators.valid_osd_id,
+ help='Specify an OSD ID to detect associated devices for zapping',
+ )
+
+ parser.add_argument(
+ '--osd-fsid',
+ help='Specify an OSD FSID to detect associated devices for zapping',
+ )
+
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip systemd unit checks',
+ )
+
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+
+ self.args = parser.parse_args(self.argv)
+
+ if self.args.osd_id or self.args.osd_fsid:
+ self.zap_osd()
+ else:
+ self.zap()