summaryrefslogtreecommitdiffstats
path: root/src/ceph-volume/ceph_volume/devices
diff options
context:
space:
mode:
Diffstat (limited to 'src/ceph-volume/ceph_volume/devices')
-rw-r--r--src/ceph-volume/ceph_volume/devices/__init__.py1
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/__init__.py1
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/activate.py281
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/batch.py631
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/common.py164
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/create.py77
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/deactivate.py88
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/listing.py223
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/main.py54
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/migrate.py719
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/prepare.py327
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/trigger.py70
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/zap.py405
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/__init__.py1
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/activate.py166
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/common.py58
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/list.py174
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/main.py40
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/prepare.py160
-rw-r--r--src/ceph-volume/ceph_volume/devices/simple/__init__.py1
-rw-r--r--src/ceph-volume/ceph_volume/devices/simple/activate.py282
-rw-r--r--src/ceph-volume/ceph_volume/devices/simple/main.py41
-rw-r--r--src/ceph-volume/ceph_volume/devices/simple/scan.py385
-rw-r--r--src/ceph-volume/ceph_volume/devices/simple/trigger.py70
24 files changed, 4419 insertions, 0 deletions
diff --git a/src/ceph-volume/ceph_volume/devices/__init__.py b/src/ceph-volume/ceph_volume/devices/__init__.py
new file mode 100644
index 000000000..2b017d671
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/__init__.py
@@ -0,0 +1 @@
+from . import lvm, simple, raw # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/__init__.py b/src/ceph-volume/ceph_volume/devices/lvm/__init__.py
new file mode 100644
index 000000000..3c147123e
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/__init__.py
@@ -0,0 +1 @@
+from .main import LVM # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/activate.py b/src/ceph-volume/ceph_volume/devices/lvm/activate.py
new file mode 100644
index 000000000..feb91053b
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/activate.py
@@ -0,0 +1,281 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+from textwrap import dedent
+from ceph_volume import process, conf, decorators, terminal, configuration
+from ceph_volume.util import system, disk
+from ceph_volume.util import prepare as prepare_utils
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume.systemd import systemctl
+from ceph_volume.api import lvm as api
+from .listing import direct_report
+
+
+logger = logging.getLogger(__name__)
+
+
+
+def get_osd_device_path(osd_lvs, device_type, dmcrypt_secret=None):
+ """
+ ``device_type`` can be one of ``db``, ``wal`` or ``block`` so that we can
+ query LVs on system and fallback to querying the uuid if that is not
+ present.
+
+ Return a path if possible, failing to do that a ``None``, since some of
+ these devices are optional.
+ """
+ osd_block_lv = None
+ for lv in osd_lvs:
+ if lv.tags.get('ceph.type') == 'block':
+ osd_block_lv = lv
+ break
+ if osd_block_lv:
+ is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1'
+ logger.debug('Found block device (%s) with encryption: %s', osd_block_lv.name, is_encrypted)
+ uuid_tag = 'ceph.%s_uuid' % device_type
+ device_uuid = osd_block_lv.tags.get(uuid_tag)
+ if not device_uuid:
+ return None
+
+ device_lv = None
+ for lv in osd_lvs:
+ if lv.tags.get('ceph.type') == device_type:
+ device_lv = lv
+ break
+ if device_lv:
+ if is_encrypted:
+ encryption_utils.luks_open(dmcrypt_secret, device_lv.lv_path, device_uuid)
+ return '/dev/mapper/%s' % device_uuid
+ return device_lv.lv_path
+
+ # this could be a regular device, so query it with blkid
+ physical_device = disk.get_device_from_partuuid(device_uuid)
+ if physical_device:
+ if is_encrypted:
+ encryption_utils.luks_open(dmcrypt_secret, physical_device, device_uuid)
+ return '/dev/mapper/%s' % device_uuid
+ return physical_device
+
+ raise RuntimeError('could not find %s with uuid %s' % (device_type, device_uuid))
+
+
+def activate_bluestore(osd_lvs, no_systemd=False, no_tmpfs=False):
+ for lv in osd_lvs:
+ if lv.tags.get('ceph.type') == 'block':
+ osd_block_lv = lv
+ break
+ else:
+ raise RuntimeError('could not find a bluestore OSD to activate')
+
+ is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1'
+ dmcrypt_secret = None
+ osd_id = osd_block_lv.tags['ceph.osd_id']
+ conf.cluster = osd_block_lv.tags['ceph.cluster_name']
+ osd_fsid = osd_block_lv.tags['ceph.osd_fsid']
+ configuration.load_ceph_conf_path(osd_block_lv.tags['ceph.cluster_name'])
+ configuration.load()
+
+ # mount on tmpfs the osd directory
+ osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+ if not system.path_is_mounted(osd_path):
+ # mkdir -p and mount as tmpfs
+ prepare_utils.create_osd_path(osd_id, tmpfs=not no_tmpfs)
+ # XXX This needs to be removed once ceph-bluestore-tool can deal with
+ # symlinks that exist in the osd dir
+ for link_name in ['block', 'block.db', 'block.wal']:
+ link_path = os.path.join(osd_path, link_name)
+ if os.path.exists(link_path):
+ os.unlink(os.path.join(osd_path, link_name))
+ # encryption is handled here, before priming the OSD dir
+ if is_encrypted:
+ osd_lv_path = '/dev/mapper/%s' % osd_block_lv.lv_uuid
+ lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret']
+ encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret)
+ dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid)
+ encryption_utils.luks_open(dmcrypt_secret, osd_block_lv.lv_path, osd_block_lv.lv_uuid)
+ else:
+ osd_lv_path = osd_block_lv.lv_path
+
+ db_device_path = get_osd_device_path(osd_lvs, 'db', dmcrypt_secret=dmcrypt_secret)
+ wal_device_path = get_osd_device_path(osd_lvs, 'wal', dmcrypt_secret=dmcrypt_secret)
+
+ # Once symlinks are removed, the osd dir can be 'primed again. chown first,
+ # regardless of what currently exists so that ``prime-osd-dir`` can succeed
+ # even if permissions are somehow messed up
+ system.chown(osd_path)
+ prime_command = [
+ 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster,
+ 'prime-osd-dir', '--dev', osd_lv_path,
+ '--path', osd_path, '--no-mon-config']
+
+ process.run(prime_command)
+ # always re-do the symlink regardless if it exists, so that the block,
+ # block.wal, and block.db devices that may have changed can be mapped
+ # correctly every time
+ process.run(['ln', '-snf', osd_lv_path, os.path.join(osd_path, 'block')])
+ system.chown(os.path.join(osd_path, 'block'))
+ system.chown(osd_path)
+ if db_device_path:
+ destination = os.path.join(osd_path, 'block.db')
+ process.run(['ln', '-snf', db_device_path, destination])
+ system.chown(db_device_path)
+ system.chown(destination)
+ if wal_device_path:
+ destination = os.path.join(osd_path, 'block.wal')
+ process.run(['ln', '-snf', wal_device_path, destination])
+ system.chown(wal_device_path)
+ system.chown(destination)
+
+ if no_systemd is False:
+ # enable the ceph-volume unit for this OSD
+ systemctl.enable_volume(osd_id, osd_fsid, 'lvm')
+
+ # enable the OSD
+ systemctl.enable_osd(osd_id)
+
+ # start the OSD
+ systemctl.start_osd(osd_id)
+ terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
+
+
+class Activate(object):
+
+ help = 'Discover and mount the LVM device associated with an OSD ID and start the Ceph OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def activate_all(self, args):
+ listed_osds = direct_report()
+ osds = {}
+ for osd_id, devices in listed_osds.items():
+ # the metadata for all devices in each OSD will contain
+ # the FSID which is required for activation
+ for device in devices:
+ fsid = device.get('tags', {}).get('ceph.osd_fsid')
+ if fsid:
+ osds[fsid] = osd_id
+ break
+ if not osds:
+ terminal.warning('Was unable to find any OSDs to activate')
+ terminal.warning('Verify OSDs are present with "ceph-volume lvm list"')
+ return
+ for osd_fsid, osd_id in osds.items():
+ if not args.no_systemd and systemctl.osd_is_active(osd_id):
+ terminal.warning(
+ 'OSD ID %s FSID %s process is active. Skipping activation' % (osd_id, osd_fsid)
+ )
+ else:
+ terminal.info('Activating OSD ID %s FSID %s' % (osd_id, osd_fsid))
+ self.activate(args, osd_id=osd_id, osd_fsid=osd_fsid)
+
+ @decorators.needs_root
+ def activate(self, args, osd_id=None, osd_fsid=None):
+ """
+ :param args: The parsed arguments coming from the CLI
+ :param osd_id: When activating all, this gets populated with an
+ existing OSD ID
+ :param osd_fsid: When activating all, this gets populated with an
+ existing OSD FSID
+ """
+ osd_id = osd_id if osd_id else args.osd_id
+ osd_fsid = osd_fsid if osd_fsid else args.osd_fsid
+
+ if osd_id and osd_fsid:
+ tags = {'ceph.osd_id': osd_id, 'ceph.osd_fsid': osd_fsid}
+ elif not osd_id and osd_fsid:
+ tags = {'ceph.osd_fsid': osd_fsid}
+ elif osd_id and not osd_fsid:
+ raise RuntimeError('could not activate osd.{}, please provide the '
+ 'osd_fsid too'.format(osd_id))
+ else:
+ raise RuntimeError('Please provide both osd_id and osd_fsid')
+ lvs = api.get_lvs(tags=tags)
+ if not lvs:
+ raise RuntimeError('could not find osd.%s with osd_fsid %s' %
+ (osd_id, osd_fsid))
+
+ # This argument is only available when passed in directly or via
+ # systemd, not when ``create`` is being used
+ # placeholder when a new objectstore support will be added
+ if getattr(args, 'auto_detect_objectstore', False):
+ logger.info('auto detecting objectstore')
+ return activate_bluestore(lvs, args.no_systemd)
+
+ # explicit 'objectstore' flags take precedence
+ if getattr(args, 'bluestore', False):
+ activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False))
+ elif any('ceph.block_device' in lv.tags for lv in lvs):
+ activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False))
+
+ def main(self):
+ sub_command_help = dedent("""
+ Activate OSDs by discovering them with LVM and mounting them in their
+ appropriate destination:
+
+ ceph-volume lvm activate {ID} {FSID}
+
+ The lvs associated with the OSD need to have been prepared previously,
+ so that all needed tags and metadata exist.
+
+ When migrating OSDs, or a multiple-osd activation is needed, the
+ ``--all`` flag can be used instead of the individual ID and FSID:
+
+ ceph-volume lvm activate --all
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm activate',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'osd_id',
+ metavar='ID',
+ nargs='?',
+ help='The ID of the OSD, usually an integer, like 0'
+ )
+ parser.add_argument(
+ 'osd_fsid',
+ metavar='FSID',
+ nargs='?',
+ help='The FSID of the OSD, similar to a SHA1'
+ )
+ parser.add_argument(
+ '--auto-detect-objectstore',
+ action='store_true',
+ help='Autodetect the objectstore by inspecting the OSD',
+ )
+ parser.add_argument(
+ '--bluestore',
+ action='store_true',
+ help='force bluestore objectstore activation',
+ )
+ parser.add_argument(
+ '--all',
+ dest='activate_all',
+ action='store_true',
+ help='Activate all OSDs found in the system',
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip creating and enabling systemd units and starting OSD services',
+ )
+ parser.add_argument(
+ '--no-tmpfs',
+ action='store_true',
+ help='Do not use a tmpfs mount for OSD data dir'
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ if args.activate_all:
+ self.activate_all(args)
+ else:
+ self.activate(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/src/ceph-volume/ceph_volume/devices/lvm/batch.py
new file mode 100644
index 000000000..69a3f672b
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/batch.py
@@ -0,0 +1,631 @@
+import argparse
+from collections import namedtuple
+import json
+import logging
+from textwrap import dedent
+from ceph_volume import terminal, decorators
+from ceph_volume.util import disk, prompt_bool, arg_validators, templates
+from ceph_volume.util import prepare
+from . import common
+from .create import Create
+from .prepare import Prepare
+
+mlogger = terminal.MultiLogger(__name__)
+logger = logging.getLogger(__name__)
+
+
+device_list_template = """
+ * {path: <25} {size: <10} {state}"""
+
+
+def device_formatter(devices):
+ lines = []
+ for path, details in devices:
+ lines.append(device_list_template.format(
+ path=path, size=details['human_readable_size'],
+ state='solid' if details['rotational'] == '0' else 'rotational')
+ )
+
+ return ''.join(lines)
+
+
+def ensure_disjoint_device_lists(data, db=[], wal=[]):
+ # check that all device lists are disjoint with each other
+ if not all([set(data).isdisjoint(set(db)),
+ set(data).isdisjoint(set(wal)),
+ set(db).isdisjoint(set(wal))]):
+ raise Exception('Device lists are not disjoint')
+
+
+def separate_devices_from_lvs(devices):
+ phys = []
+ lvm = []
+ for d in devices:
+ phys.append(d) if d.is_device else lvm.append(d)
+ return phys, lvm
+
+
+def get_physical_osds(devices, args):
+ '''
+ Goes through passed physical devices and assigns OSDs
+ '''
+ data_slots = args.osds_per_device
+ if args.data_slots:
+ data_slots = max(args.data_slots, args.osds_per_device)
+ rel_data_size = args.data_allocate_fraction / data_slots
+ mlogger.debug('relative data size: {}'.format(rel_data_size))
+ ret = []
+ for dev in devices:
+ if dev.available_lvm:
+ dev_size = dev.vg_size[0]
+ abs_size = disk.Size(b=int(dev_size * rel_data_size))
+ free_size = dev.vg_free[0]
+ for _ in range(args.osds_per_device):
+ if abs_size > free_size:
+ break
+ free_size -= abs_size.b
+ osd_id = None
+ if args.osd_ids:
+ osd_id = args.osd_ids.pop()
+ ret.append(Batch.OSD(dev.path,
+ rel_data_size,
+ abs_size,
+ args.osds_per_device,
+ osd_id,
+ 'dmcrypt' if args.dmcrypt else None,
+ dev.symlink))
+ return ret
+
+
+def get_lvm_osds(lvs, args):
+ '''
+ Goes through passed LVs and assigns planned osds
+ '''
+ ret = []
+ for lv in lvs:
+ if lv.used_by_ceph:
+ continue
+ osd_id = None
+ if args.osd_ids:
+ osd_id = args.osd_ids.pop()
+ osd = Batch.OSD("{}/{}".format(lv.vg_name, lv.lv_name),
+ 100.0,
+ disk.Size(b=int(lv.lvs[0].lv_size)),
+ 1,
+ osd_id,
+ 'dmcrypt' if args.dmcrypt else None)
+ ret.append(osd)
+ return ret
+
+
+def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, args):
+ requested_slots = getattr(args, '{}_slots'.format(type_))
+ if not requested_slots or requested_slots < fast_slots_per_device:
+ if requested_slots:
+ mlogger.info('{}_slots argument is too small, ignoring'.format(type_))
+ requested_slots = fast_slots_per_device
+
+ requested_size = getattr(args, '{}_size'.format(type_), 0)
+ if not requested_size or requested_size == 0:
+ # no size argument was specified, check ceph.conf
+ get_size_fct = getattr(prepare, 'get_{}_size'.format(type_))
+ requested_size = get_size_fct(lv_format=False)
+
+ ret = []
+ vg_device_map = group_devices_by_vg(devices)
+ for vg_name, vg_devices in vg_device_map.items():
+ for dev in vg_devices:
+ if not dev.available_lvm:
+ continue
+ # any LV present is considered a taken slot
+ occupied_slots = len(dev.lvs)
+ # prior to v15.2.8, db/wal deployments were grouping multiple fast devices into single VGs - we need to
+ # multiply requested_slots (per device) by the number of devices in the VG in order to ensure that
+ # abs_size is calculated correctly from vg_size
+ if vg_name == 'unused_devices':
+ slots_for_vg = requested_slots
+ else:
+ if len(vg_devices) > 1:
+ slots_for_vg = len(args.devices)
+ else:
+ slots_for_vg = len(vg_devices) * requested_slots
+ dev_size = dev.vg_size[0]
+ # this only looks at the first vg on device, unsure if there is a better
+ # way
+ abs_size = disk.Size(b=int(dev_size / slots_for_vg))
+ free_size = dev.vg_free[0]
+ relative_size = int(abs_size) / dev_size
+ if requested_size:
+ if requested_size <= abs_size:
+ abs_size = requested_size
+ relative_size = int(abs_size) / dev_size
+ else:
+ mlogger.error(
+ '{} was requested for {}, but only {} can be fulfilled'.format(
+ requested_size,
+ '{}_size'.format(type_),
+ abs_size,
+ ))
+ exit(1)
+ while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
+ free_size -= abs_size.b
+ occupied_slots += 1
+ ret.append((dev.path, relative_size, abs_size, requested_slots))
+ return ret
+
+def group_devices_by_vg(devices):
+ result = dict()
+ result['unused_devices'] = []
+ for dev in devices:
+ if len(dev.vgs) > 0:
+ vg_name = dev.vgs[0].name
+ if vg_name in result:
+ result[vg_name].append(dev)
+ else:
+ result[vg_name] = [dev]
+ else:
+ result['unused_devices'].append(dev)
+ return result
+
+def get_lvm_fast_allocs(lvs):
+ return [("{}/{}".format(d.vg_name, d.lv_name), 100.0,
+ disk.Size(b=int(d.lvs[0].lv_size)), 1) for d in lvs if not
+ d.journal_used_by_ceph]
+
+
+class Batch(object):
+
+ help = 'Automatically size devices for multi-OSD provisioning with minimal interaction'
+
+ _help = dedent("""
+ Automatically size devices ready for OSD provisioning based on default strategies.
+
+ Usage:
+
+ ceph-volume lvm batch [DEVICE...]
+
+ Devices can be physical block devices or LVs.
+ Optional reporting on possible outcomes is enabled with --report
+
+ ceph-volume lvm batch --report [DEVICE...]
+ """)
+
+ def __init__(self, argv):
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm batch',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=self._help,
+ )
+
+ parser.add_argument(
+ 'devices',
+ metavar='DEVICES',
+ nargs='*',
+ type=arg_validators.ValidBatchDataDevice(),
+ default=[],
+ help='Devices to provision OSDs',
+ )
+ parser.add_argument(
+ '--db-devices',
+ nargs='*',
+ type=arg_validators.ValidBatchDevice(),
+ default=[],
+ help='Devices to provision OSDs db volumes',
+ )
+ parser.add_argument(
+ '--wal-devices',
+ nargs='*',
+ type=arg_validators.ValidBatchDevice(),
+ default=[],
+ help='Devices to provision OSDs wal volumes',
+ )
+ parser.add_argument(
+ '--auto',
+ action='store_true',
+ help=('deploy multi-device OSDs if rotational and non-rotational drives '
+ 'are passed in DEVICES'),
+ default=True
+ )
+ parser.add_argument(
+ '--no-auto',
+ action='store_false',
+ dest='auto',
+ help=('deploy standalone OSDs if rotational and non-rotational drives '
+ 'are passed in DEVICES'),
+ )
+ parser.add_argument(
+ '--bluestore',
+ action='store_true',
+ help='bluestore objectstore (default)',
+ )
+ parser.add_argument(
+ '--report',
+ action='store_true',
+ help='Only report on OSD that would be created and exit',
+ )
+ parser.add_argument(
+ '--yes',
+ action='store_true',
+ help='Avoid prompting for confirmation when provisioning',
+ )
+ parser.add_argument(
+ '--format',
+ help='output format, defaults to "pretty"',
+ default='pretty',
+ choices=['json', 'json-pretty', 'pretty'],
+ )
+ parser.add_argument(
+ '--dmcrypt',
+ action='store_true',
+ help='Enable device encryption via dm-crypt',
+ )
+ parser.add_argument(
+ '--crush-device-class',
+ dest='crush_device_class',
+ help='Crush device class to assign this OSD to',
+ default=""
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip creating and enabling systemd units and starting OSD services',
+ )
+ parser.add_argument(
+ '--osds-per-device',
+ type=int,
+ default=1,
+ help='Provision more than 1 (the default) OSD per device',
+ )
+ parser.add_argument(
+ '--data-slots',
+ type=int,
+ help=('Provision more than 1 (the default) OSD slot per device'
+ ' if more slots then osds-per-device are specified, slots'
+ 'will stay unoccupied'),
+ )
+ parser.add_argument(
+ '--data-allocate-fraction',
+ type=arg_validators.ValidFraction(),
+ help='Fraction to allocate from data device (0,1.0]',
+ default=1.0
+ )
+ parser.add_argument(
+ '--block-db-size',
+ type=disk.Size.parse,
+ help='Set (or override) the "bluestore_block_db_size" value, in bytes'
+ )
+ parser.add_argument(
+ '--block-db-slots',
+ type=int,
+ help='Provision slots on DB device, can remain unoccupied'
+ )
+ parser.add_argument(
+ '--block-wal-size',
+ type=disk.Size.parse,
+ help='Set (or override) the "bluestore_block_wal_size" value, in bytes'
+ )
+ parser.add_argument(
+ '--block-wal-slots',
+ type=int,
+ help='Provision slots on WAL device, can remain unoccupied'
+ )
+ parser.add_argument(
+ '--prepare',
+ action='store_true',
+ help='Only prepare all OSDs, do not activate',
+ )
+ parser.add_argument(
+ '--osd-ids',
+ nargs='*',
+ default=[],
+ help='Reuse existing OSD ids',
+ type=arg_validators.valid_osd_id
+ )
+ self.args = parser.parse_args(argv)
+ self.parser = parser
+ for dev_list in ['', 'db_', 'wal_']:
+ setattr(self, '{}usable'.format(dev_list), [])
+
+ def report(self, plan):
+ report = self._create_report(plan)
+ print(report)
+
+ def _create_report(self, plan):
+ if self.args.format == 'pretty':
+ report = ''
+ report += templates.total_osds.format(total_osds=len(plan))
+
+ report += templates.osd_component_titles
+ for osd in plan:
+ report += templates.osd_header
+ report += osd.report()
+ return report
+ else:
+ json_report = []
+ for osd in plan:
+ json_report.append(osd.report_json())
+ if self.args.format == 'json':
+ return json.dumps(json_report)
+ elif self.args.format == 'json-pretty':
+ return json.dumps(json_report, indent=4,
+ sort_keys=True)
+
+ def _check_slot_args(self):
+ '''
+ checking if -slots args are consistent with other arguments
+ '''
+ if self.args.data_slots and self.args.osds_per_device:
+ if self.args.data_slots < self.args.osds_per_device:
+ raise ValueError('data_slots is smaller then osds_per_device')
+
+ def _sort_rotational_disks(self):
+ '''
+ Helper for legacy auto behaviour.
+ Sorts drives into rotating and non-rotating, the latter being used for
+ db.
+ '''
+ mlogger.warning('DEPRECATION NOTICE')
+ mlogger.warning('You are using the legacy automatic disk sorting behavior')
+ mlogger.warning('The Pacific release will change the default to --no-auto')
+ rotating = []
+ ssd = []
+ for d in self.args.devices:
+ rotating.append(d) if d.rotational else ssd.append(d)
+ if ssd and not rotating:
+ # no need for additional sorting, we'll only deploy standalone on ssds
+ return
+ self.args.devices = rotating
+ self.args.db_devices = ssd
+
+ @decorators.needs_root
+ def main(self):
+ if not self.args.devices:
+ return self.parser.print_help()
+
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not self.args.bluestore:
+ self.args.bluestore = True
+
+ if (self.args.auto and not self.args.db_devices and not
+ self.args.wal_devices):
+ self._sort_rotational_disks()
+
+ self._check_slot_args()
+
+ ensure_disjoint_device_lists(self.args.devices,
+ self.args.db_devices,
+ self.args.wal_devices)
+
+ plan = self.get_plan(self.args)
+
+ if self.args.report:
+ self.report(plan)
+ return 0
+
+ if not self.args.yes:
+ self.report(plan)
+ terminal.info('The above OSDs would be created if the operation continues')
+ if not prompt_bool('do you want to proceed? (yes/no)'):
+ terminal.error('aborting OSD provisioning')
+ raise SystemExit(0)
+
+ self._execute(plan)
+
+ def _execute(self, plan):
+ defaults = common.get_default_args()
+ global_args = [
+ 'bluestore',
+ 'dmcrypt',
+ 'crush_device_class',
+ 'no_systemd',
+ ]
+ defaults.update({arg: getattr(self.args, arg) for arg in global_args})
+ for osd in plan:
+ args = osd.get_args(defaults)
+ if self.args.prepare:
+ p = Prepare([])
+ p.safe_prepare(argparse.Namespace(**args))
+ else:
+ c = Create([])
+ c.create(argparse.Namespace(**args))
+
+
+ def get_plan(self, args):
+ if args.bluestore:
+ plan = self.get_deployment_layout(args, args.devices, args.db_devices,
+ args.wal_devices)
+ return plan
+
+ def get_deployment_layout(self, args, devices, fast_devices=[],
+ very_fast_devices=[]):
+ '''
+ The methods here are mostly just organization, error reporting and
+ setting up of (default) args. The heavy lifting code for the deployment
+ layout can be found in the static get_*_osds and get_*_fast_allocs
+ functions.
+ '''
+ plan = []
+ phys_devs, lvm_devs = separate_devices_from_lvs(devices)
+ mlogger.debug(('passed data devices: {} physical,'
+ ' {} LVM').format(len(phys_devs), len(lvm_devs)))
+
+ plan.extend(get_physical_osds(phys_devs, args))
+
+ plan.extend(get_lvm_osds(lvm_devs, args))
+
+ num_osds = len(plan)
+ if num_osds == 0:
+ mlogger.info('All data devices are unavailable')
+ return plan
+ requested_osds = args.osds_per_device * len(phys_devs) + len(lvm_devs)
+
+ if args.bluestore:
+ fast_type = 'block_db'
+ fast_allocations = self.fast_allocations(fast_devices,
+ requested_osds,
+ num_osds,
+ fast_type)
+ if fast_devices and not fast_allocations:
+ mlogger.info('{} fast devices were passed, but none are available'.format(len(fast_devices)))
+ return []
+ if fast_devices and not len(fast_allocations) == num_osds:
+ mlogger.error('{} fast allocations != {} num_osds'.format(
+ len(fast_allocations), num_osds))
+ exit(1)
+
+ very_fast_allocations = self.fast_allocations(very_fast_devices,
+ requested_osds,
+ num_osds,
+ 'block_wal')
+ if very_fast_devices and not very_fast_allocations:
+ mlogger.info('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
+ return []
+ if very_fast_devices and not len(very_fast_allocations) == num_osds:
+ mlogger.error('{} very fast allocations != {} num_osds'.format(
+ len(very_fast_allocations), num_osds))
+ exit(1)
+
+ for osd in plan:
+ if fast_devices:
+ osd.add_fast_device(*fast_allocations.pop(),
+ type_=fast_type)
+ if very_fast_devices and args.bluestore:
+ osd.add_very_fast_device(*very_fast_allocations.pop())
+ return plan
+
+ def fast_allocations(self, devices, requested_osds, new_osds, type_):
+ ret = []
+ if not devices:
+ return ret
+ phys_devs, lvm_devs = separate_devices_from_lvs(devices)
+ mlogger.debug(('passed {} devices: {} physical,'
+ ' {} LVM').format(type_, len(phys_devs), len(lvm_devs)))
+
+ ret.extend(get_lvm_fast_allocs(lvm_devs))
+
+ # fill up uneven distributions across fast devices: 5 osds and 2 fast
+ # devices? create 3 slots on each device rather then deploying
+ # heterogeneous osds
+ slot_divider = max(1, len(phys_devs))
+ if (requested_osds - len(lvm_devs)) % slot_divider:
+ fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider) + 1
+ else:
+ fast_slots_per_device = int((requested_osds - len(lvm_devs)) / slot_divider)
+
+
+ ret.extend(get_physical_fast_allocs(phys_devs,
+ type_,
+ fast_slots_per_device,
+ new_osds,
+ self.args))
+ return ret
+
+ class OSD(object):
+ '''
+ This class simply stores info about to-be-deployed OSDs and provides an
+ easy way to retrieve the necessary create arguments.
+ '''
+ VolSpec = namedtuple('VolSpec',
+ ['path',
+ 'rel_size',
+ 'abs_size',
+ 'slots',
+ 'type_'])
+
+ def __init__(self,
+ data_path,
+ rel_size,
+ abs_size,
+ slots,
+ id_,
+ encryption,
+ symlink=None):
+ self.id_ = id_
+ self.data = self.VolSpec(path=data_path,
+ rel_size=rel_size,
+ abs_size=abs_size,
+ slots=slots,
+ type_='data')
+ self.fast = None
+ self.very_fast = None
+ self.encryption = encryption
+ self.symlink = symlink
+
+ def add_fast_device(self, path, rel_size, abs_size, slots, type_):
+ self.fast = self.VolSpec(path=path,
+ rel_size=rel_size,
+ abs_size=abs_size,
+ slots=slots,
+ type_=type_)
+
+ def add_very_fast_device(self, path, rel_size, abs_size, slots):
+ self.very_fast = self.VolSpec(path=path,
+ rel_size=rel_size,
+ abs_size=abs_size,
+ slots=slots,
+ type_='block_wal')
+
+ def _get_osd_plan(self):
+ plan = {
+ 'data': self.data.path,
+ 'data_size': self.data.abs_size,
+ 'encryption': self.encryption,
+ }
+ if self.fast:
+ type_ = self.fast.type_.replace('.', '_')
+ plan.update(
+ {
+ type_: self.fast.path,
+ '{}_size'.format(type_): self.fast.abs_size,
+ })
+ if self.very_fast:
+ plan.update(
+ {
+ 'block_wal': self.very_fast.path,
+ 'block_wal_size': self.very_fast.abs_size,
+ })
+ if self.id_:
+ plan.update({'osd_id': self.id_})
+ return plan
+
+ def get_args(self, defaults):
+ my_defaults = defaults.copy()
+ my_defaults.update(self._get_osd_plan())
+ return my_defaults
+
+ def report(self):
+ report = ''
+ if self.id_:
+ report += templates.osd_reused_id.format(
+ id_=self.id_)
+ if self.encryption:
+ report += templates.osd_encryption.format(
+ enc=self.encryption)
+ path = self.data.path
+ if self.symlink:
+ path = f'{self.symlink} -> {self.data.path}'
+ report += templates.osd_component.format(
+ _type=self.data.type_,
+ path=path,
+ size=self.data.abs_size,
+ percent=self.data.rel_size)
+ if self.fast:
+ report += templates.osd_component.format(
+ _type=self.fast.type_,
+ path=self.fast.path,
+ size=self.fast.abs_size,
+ percent=self.fast.rel_size)
+ if self.very_fast:
+ report += templates.osd_component.format(
+ _type=self.very_fast.type_,
+ path=self.very_fast.path,
+ size=self.very_fast.abs_size,
+ percent=self.very_fast.rel_size)
+ return report
+
+ def report_json(self):
+ # cast all values to string so that the report can be dumped in to
+ # json.dumps
+ return {k: str(v) for k, v in self._get_osd_plan().items()}
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/common.py b/src/ceph-volume/ceph_volume/devices/lvm/common.py
new file mode 100644
index 000000000..35e53181a
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/common.py
@@ -0,0 +1,164 @@
+from ceph_volume.util import arg_validators, disk
+from ceph_volume import process, conf
+from ceph_volume import terminal
+from ceph_volume.devices.lvm.zap import Zap
+import argparse
+
+def rollback_osd(args, osd_id=None):
+ """
+ When the process of creating or preparing fails, the OSD needs to be
+ destroyed so that the ID can be reused. This prevents from leaving the ID
+ around as "used" on the monitor, which can cause confusion if expecting
+ sequential OSD IDs.
+
+ The usage of `destroy-new` allows this to be done without requiring the
+ admin keyring (otherwise needed for destroy and purge commands)
+ """
+ if not osd_id:
+ # it means that it wasn't generated, so there is nothing to rollback here
+ return
+
+ # once here, this is an error condition that needs to be rolled back
+ terminal.error('Was unable to complete a new OSD, will rollback changes')
+ osd_name = 'osd.%s'
+ bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
+ cmd = [
+ 'ceph',
+ '--cluster', conf.cluster,
+ '--name', 'client.bootstrap-osd',
+ '--keyring', bootstrap_keyring,
+ 'osd', 'purge-new', osd_name % osd_id,
+ '--yes-i-really-mean-it',
+ ]
+
+ process.run(cmd)
+ Zap(['--destroy', '--osd-id', osd_id]).main()
+
+
+common_args = {
+ '--data': {
+ 'help': 'OSD data path. A physical device or logical volume',
+ 'required': True,
+ 'type': arg_validators.ValidDataDevice(as_string=True),
+ #'default':,
+ #'type':,
+ },
+ '--data-size': {
+ 'help': 'Size of data LV in case a device was passed in --data',
+ 'default': '0',
+ 'type': disk.Size.parse
+ },
+ '--data-slots': {
+ 'help': ('Intended number of slots on data device. The new OSD gets one'
+ 'of those slots or 1/nth of the available capacity'),
+ 'type': int,
+ 'default': 1,
+ },
+ '--osd-id': {
+ 'help': 'Reuse an existing OSD id',
+ 'default': None,
+ 'type': arg_validators.valid_osd_id,
+ },
+ '--osd-fsid': {
+ 'help': 'Reuse an existing OSD fsid',
+ 'default': None,
+ },
+ '--cluster-fsid': {
+ 'help': 'Specify the cluster fsid, useful when no ceph.conf is available',
+ 'default': None,
+ },
+ '--crush-device-class': {
+ 'dest': 'crush_device_class',
+ 'help': 'Crush device class to assign this OSD to',
+ 'default': "",
+ },
+ '--dmcrypt': {
+ 'action': 'store_true',
+ 'help': 'Enable device encryption via dm-crypt',
+ },
+ '--no-systemd': {
+ 'dest': 'no_systemd',
+ 'action': 'store_true',
+ 'help': 'Skip creating and enabling systemd units and starting OSD services when activating',
+ },
+}
+
+bluestore_args = {
+ '--bluestore': {
+ 'action': 'store_true',
+ 'help': 'Use the bluestore objectstore',
+ },
+ '--block.db': {
+ 'dest': 'block_db',
+ 'help': 'Path to bluestore block.db logical volume or device',
+ 'type': arg_validators.ValidDevice(as_string=True),
+ },
+ '--block.db-size': {
+ 'dest': 'block_db_size',
+ 'help': 'Size of block.db LV in case device was passed in --block.db',
+ 'default': '0',
+ 'type': disk.Size.parse
+ },
+ '--block.db-slots': {
+ 'dest': 'block_db_slots',
+ 'help': ('Intended number of slots on db device. The new OSD gets one'
+ 'of those slots or 1/nth of the available capacity'),
+ 'type': int,
+ 'default': 1,
+ },
+ '--block.wal': {
+ 'dest': 'block_wal',
+ 'help': 'Path to bluestore block.wal logical volume or device',
+ 'type': arg_validators.ValidDevice(as_string=True),
+ },
+ '--block.wal-size': {
+ 'dest': 'block_wal_size',
+ 'help': 'Size of block.wal LV in case device was passed in --block.wal',
+ 'default': '0',
+ 'type': disk.Size.parse
+ },
+ '--block.wal-slots': {
+ 'dest': 'block_wal_slots',
+ 'help': ('Intended number of slots on wal device. The new OSD gets one'
+ 'of those slots or 1/nth of the available capacity'),
+ 'type': int,
+ 'default': 1,
+ },
+}
+
+
+def get_default_args():
+ defaults = {}
+ def format_name(name):
+ return name.strip('-').replace('-', '_').replace('.', '_')
+ for argset in (common_args, bluestore_args):
+ defaults.update({format_name(name): val.get('default', None) for name, val in argset.items()})
+ return defaults
+
+
+def common_parser(prog, description):
+ """
+ Both prepare and create share the same parser, those are defined here to
+ avoid duplication
+ """
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=description,
+ )
+
+ bluestore_group = parser.add_argument_group('bluestore')
+
+ for name, kwargs in common_args.items():
+ parser.add_argument(name, **kwargs)
+
+ for name, kwargs in bluestore_args.items():
+ bluestore_group.add_argument(name, **kwargs)
+
+ # Do not parse args, so that consumers can do something before the args get
+ # parsed triggering argparse behavior
+ return parser
+
+
+create_parser = common_parser # noqa
+prepare_parser = common_parser # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/create.py b/src/ceph-volume/ceph_volume/devices/lvm/create.py
new file mode 100644
index 000000000..631a21b23
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/create.py
@@ -0,0 +1,77 @@
+from __future__ import print_function
+from textwrap import dedent
+import logging
+from ceph_volume.util import system
+from ceph_volume.util.arg_validators import exclude_group_options
+from ceph_volume import decorators, terminal
+from .common import create_parser, rollback_osd
+from .prepare import Prepare
+from .activate import Activate
+
+logger = logging.getLogger(__name__)
+
+
+class Create(object):
+
+ help = 'Create a new OSD from an LVM device'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def create(self, args):
+ if not args.osd_fsid:
+ args.osd_fsid = system.generate_uuid()
+ prepare_step = Prepare([])
+ prepare_step.safe_prepare(args)
+ osd_id = prepare_step.osd_id
+ try:
+ # we try this for activate only when 'creating' an OSD, because a rollback should not
+ # happen when doing normal activation. For example when starting an OSD, systemd will call
+ # activate, which would never need to be rolled back.
+ Activate([]).activate(args)
+ except Exception:
+ logger.exception('lvm activate was unable to complete, while creating the OSD')
+ logger.info('will rollback OSD ID creation')
+ rollback_osd(args, osd_id)
+ raise
+ terminal.success("ceph-volume lvm create successful for: %s" % args.data)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Create an OSD by assigning an ID and FSID, registering them with the
+ cluster with an ID and FSID, formatting and mounting the volume, adding
+ all the metadata to the logical volumes using LVM tags, and starting
+ the OSD daemon. This is a convenience command that combines the prepare
+ and activate steps.
+
+ Encryption is supported via dmcrypt and the --dmcrypt flag.
+
+ Existing logical volume (lv):
+
+ ceph-volume lvm create --data {vg/lv}
+
+ Existing block device (a logical volume will be created):
+
+ ceph-volume lvm create --data /path/to/device
+
+ Optionally, can consume db and wal block devices, partitions or logical
+ volumes. A device will get a logical volume, partitions and existing
+ logical volumes will be used as is:
+
+ ceph-volume lvm create --data {vg/lv} --block.wal {partition} --block.db {/path/to/device}
+ """)
+ parser = create_parser(
+ prog='ceph-volume lvm create',
+ description=sub_command_help,
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ exclude_group_options(parser, groups=['bluestore'], argv=self.argv)
+ args = parser.parse_args(self.argv)
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not args.bluestore:
+ args.bluestore = True
+ self.create(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/deactivate.py b/src/ceph-volume/ceph_volume/devices/lvm/deactivate.py
new file mode 100644
index 000000000..0cc8d71ae
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/deactivate.py
@@ -0,0 +1,88 @@
+import argparse
+import logging
+import sys
+from textwrap import dedent
+from ceph_volume import conf
+from ceph_volume.util import encryption, system
+from ceph_volume.api.lvm import get_lvs_by_tag
+
+logger = logging.getLogger(__name__)
+
+
+def deactivate_osd(osd_id=None, osd_uuid=None):
+
+ lvs = []
+ if osd_uuid is not None:
+ lvs = get_lvs_by_tag('ceph.osd_fsid={}'.format(osd_uuid))
+ osd_id = next(lv.tags['ceph.osd_id'] for lv in lvs)
+ else:
+ lvs = get_lvs_by_tag('ceph.osd_id={}'.format(osd_id))
+
+ data_lv = next(lv for lv in lvs if lv.tags['ceph.type'] in ['data', 'block'])
+
+ conf.cluster = data_lv.tags['ceph.cluster_name']
+ logger.debug('Found cluster name {}'.format(conf.cluster))
+
+ tmpfs_path = '/var/lib/ceph/osd/{}-{}'.format(conf.cluster, osd_id)
+ system.unmount_tmpfs(tmpfs_path)
+
+ for lv in lvs:
+ if lv.tags.get('ceph.encrypted', '0') == '1':
+ encryption.dmcrypt_close(mapping=lv.lv_uuid, skip_path_check=True)
+
+
+class Deactivate(object):
+
+ help = 'Deactivate OSDs'
+
+ def deactivate(self, args=None):
+ if args:
+ self.args = args
+ try:
+ deactivate_osd(self.args.osd_id, self.args.osd_uuid)
+ except StopIteration:
+ logger.error(('No data or block LV found for OSD'
+ '{}').format(self.args.osd_id))
+ sys.exit(1)
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def main(self):
+ sub_command_help = dedent("""
+ Deactivate unmounts and OSDs tmpfs and closes any crypt devices.
+
+ ceph-volume lvm deactivate {ID} {FSID}
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm deactivate',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'osd_id',
+ nargs='?',
+ help='The ID of the OSD'
+ )
+ parser.add_argument(
+ 'osd_uuid',
+ nargs='?',
+ help='The UUID of the OSD, similar to a SHA1, takes precedence over osd_id'
+ )
+ # parser.add_argument(
+ # '--all',
+ # action='store_true',
+ # help='Deactivate all OSD volumes found in the system',
+ # )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not args.osd_id and not args.osd_uuid:
+ raise ValueError(('Can not identify OSD, pass either all or'
+ 'osd_id or osd_uuid'))
+ self.deactivate(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/listing.py b/src/ceph-volume/ceph_volume/devices/lvm/listing.py
new file mode 100644
index 000000000..c16afdaa7
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/listing.py
@@ -0,0 +1,223 @@
+from __future__ import print_function
+import argparse
+import json
+import logging
+from textwrap import dedent
+from ceph_volume import decorators
+from ceph_volume.api import lvm as api
+
+logger = logging.getLogger(__name__)
+
+
+osd_list_header_template = """\n
+{osd_id:=^20}"""
+
+
+osd_device_header_template = """
+
+ {type: <13} {path}
+"""
+
+device_metadata_item_template = """
+ {tag_name: <25} {value}"""
+
+
+def readable_tag(tag):
+ actual_name = tag.split('.')[-1]
+ return actual_name.replace('_', ' ')
+
+
+def pretty_report(report):
+ output = []
+ for osd_id, devices in sorted(report.items()):
+ output.append(
+ osd_list_header_template.format(osd_id=" osd.%s " % osd_id)
+ )
+ for device in devices:
+ output.append(
+ osd_device_header_template.format(
+ type='[%s]' % device['type'],
+ path=device['path']
+ )
+ )
+ for tag_name, value in sorted(device.get('tags', {}).items()):
+ output.append(
+ device_metadata_item_template.format(
+ tag_name=readable_tag(tag_name),
+ value=value
+ )
+ )
+ if not device.get('devices'):
+ continue
+ else:
+ output.append(
+ device_metadata_item_template.format(
+ tag_name='devices',
+ value=','.join(device['devices'])
+ )
+ )
+
+ print(''.join(output))
+
+
+def direct_report():
+ """
+ Other non-cli consumers of listing information will want to consume the
+ report without the need to parse arguments or other flags. This helper
+ bypasses the need to deal with the class interface which is meant for cli
+ handling.
+ """
+ return List([]).full_report()
+
+
+# TODO: Perhaps, get rid of this class and simplify this module further?
+class List(object):
+
+ help = 'list logical volumes and devices associated with Ceph'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def list(self, args):
+ report = self.single_report(args.device) if args.device else \
+ self.full_report()
+ if args.format == 'json':
+ # If the report is empty, we don't return a non-zero exit status
+ # because it is assumed this is going to be consumed by automated
+ # systems like ceph-ansible which would be forced to ignore the
+ # non-zero exit status if all they need is the information in the
+ # JSON object
+ print(json.dumps(report, indent=4, sort_keys=True))
+ else:
+ if not report:
+ raise SystemExit('No valid Ceph lvm devices found')
+ pretty_report(report)
+
+ def create_report(self, lvs):
+ """
+ Create a report for LVM dev(s) passed. Returns '{}' to denote failure.
+ """
+
+ report = {}
+
+ pvs = api.get_pvs()
+
+ for lv in lvs:
+ if not api.is_ceph_device(lv):
+ continue
+
+ osd_id = lv.tags['ceph.osd_id']
+ report.setdefault(osd_id, [])
+ lv_report = lv.as_dict()
+
+ lv_report['devices'] = [pv.name for pv in pvs if pv.lv_uuid == lv.lv_uuid] if pvs else []
+ report[osd_id].append(lv_report)
+
+ phys_devs = self.create_report_non_lv_device(lv)
+ if phys_devs:
+ report[osd_id].append(phys_devs)
+
+ return report
+
+ def create_report_non_lv_device(self, lv):
+ report = {}
+ if lv.tags.get('ceph.type', '') in ['data', 'block']:
+ for dev_type in ['journal', 'wal', 'db']:
+ dev = lv.tags.get('ceph.{}_device'.format(dev_type), '')
+ # counting / in the device name seems brittle but should work,
+ # lvs will have 3
+ if dev and dev.count('/') == 2:
+ device_uuid = lv.tags.get('ceph.{}_uuid'.format(dev_type))
+ report = {'tags': {'PARTUUID': device_uuid},
+ 'type': dev_type,
+ 'path': dev}
+ return report
+
+ def full_report(self):
+ """
+ Create a report of all Ceph LVs. Returns '{}' to denote failure.
+ """
+ return self.create_report(api.get_lvs())
+
+ def single_report(self, arg):
+ """
+ Generate a report for a single device. This can be either a logical
+ volume in the form of vg/lv, a device with an absolute path like
+ /dev/sda1 or /dev/sda, or a list of devices under same OSD ID.
+
+ Return value '{}' denotes failure.
+ """
+ if isinstance(arg, int) or arg.isdigit():
+ lv = api.get_lvs_from_osd_id(arg)
+ elif arg[0] == '/':
+ lv = api.get_lvs_from_path(arg)
+ else:
+ lv = [api.get_single_lv(filters={'lv_name': arg.split('/')[1]})]
+
+ report = self.create_report(lv)
+
+ if not report:
+ # check if device is a non-lvm journals or wal/db
+ for dev_type in ['journal', 'wal', 'db']:
+ lvs = api.get_lvs(tags={
+ 'ceph.{}_device'.format(dev_type): arg})
+ if lvs:
+ # just taking the first lv here should work
+ lv = lvs[0]
+ phys_dev = self.create_report_non_lv_device(lv)
+ osd_id = lv.tags.get('ceph.osd_id')
+ if osd_id:
+ report[osd_id] = [phys_dev]
+
+
+ return report
+
+ def main(self):
+ sub_command_help = dedent("""
+ List devices or logical volumes associated with Ceph. An association is
+ determined if a device has information relating to an OSD. This is
+ verified by querying LVM's metadata and correlating it with devices.
+
+ The lvs associated with the OSD need to have been prepared previously,
+ so that all needed tags and metadata exist.
+
+ Full listing of all system devices associated with a cluster::
+
+ ceph-volume lvm list
+
+ List devices under same OSD ID::
+
+ ceph-volume lvm list <OSD-ID>
+
+ List a particular device, reporting all metadata about it::
+
+ ceph-volume lvm list /dev/sda1
+
+ List a logical volume, along with all its metadata (vg is a volume
+ group, and lv the logical volume name)::
+
+ ceph-volume lvm list {vg/lv}
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm list',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'device',
+ metavar='DEVICE',
+ nargs='?',
+ help='Path to an lv (as vg/lv) or to a device like /dev/sda1'
+ )
+
+ parser.add_argument(
+ '--format',
+ help='output format, defaults to "pretty"',
+ default='pretty',
+ choices=['json', 'pretty'],
+ )
+
+ args = parser.parse_args(self.argv)
+ self.list(args)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/main.py b/src/ceph-volume/ceph_volume/devices/lvm/main.py
new file mode 100644
index 000000000..39947454d
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/main.py
@@ -0,0 +1,54 @@
+import argparse
+from textwrap import dedent
+from ceph_volume import terminal
+from . import activate
+from . import deactivate
+from . import prepare
+from . import create
+from . import trigger
+from . import listing
+from . import zap
+from . import batch
+from . import migrate
+
+
+class LVM(object):
+
+ help = 'Use LVM and LVM-based technologies to deploy OSDs'
+
+ _help = dedent("""
+ Use LVM and LVM-based technologies to deploy OSDs
+
+ {sub_help}
+ """)
+
+ mapper = {
+ 'activate': activate.Activate,
+ 'deactivate': deactivate.Deactivate,
+ 'batch': batch.Batch,
+ 'prepare': prepare.Prepare,
+ 'create': create.Create,
+ 'trigger': trigger.Trigger,
+ 'list': listing.List,
+ 'zap': zap.Zap,
+ 'migrate': migrate.Migrate,
+ 'new-wal': migrate.NewWAL,
+ 'new-db': migrate.NewDB,
+ }
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def print_help(self, sub_help):
+ return self._help.format(sub_help=sub_help)
+
+ def main(self):
+ terminal.dispatch(self.mapper, self.argv)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=self.print_help(terminal.subhelp(self.mapper)),
+ )
+ parser.parse_args(self.argv)
+ if len(self.argv) <= 1:
+ return parser.print_help()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/migrate.py b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py
new file mode 100644
index 000000000..64589a2d6
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py
@@ -0,0 +1,719 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+from textwrap import dedent
+from ceph_volume.util import system, disk, merge_dict
+from ceph_volume.util.device import Device
+from ceph_volume.util.arg_validators import valid_osd_id
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume import decorators, terminal, process
+from ceph_volume.api import lvm as api
+from ceph_volume.systemd import systemctl
+
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+def get_cluster_name(osd_id, osd_fsid):
+ """
+ From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
+ system that match those tag values, then return cluster_name for the first
+ one.
+ """
+ lv_tags = {}
+ lv_tags['ceph.osd_id'] = osd_id
+ lv_tags['ceph.osd_fsid'] = osd_fsid
+
+ lvs = api.get_lvs(tags=lv_tags)
+ if not lvs:
+ mlogger.error(
+ 'Unable to find any LV for source OSD: id:{} fsid:{}'.format(
+ osd_id, osd_fsid) )
+ raise SystemExit('Unexpected error, terminating')
+ return next(iter(lvs)).tags["ceph.cluster_name"]
+
+def get_osd_path(osd_id, osd_fsid):
+ return '/var/lib/ceph/osd/{}-{}'.format(
+ get_cluster_name(osd_id, osd_fsid), osd_id)
+
+def find_associated_devices(osd_id, osd_fsid):
+ """
+ From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
+ system that match those tag values, further detect if any partitions are
+ part of the OSD, and then return the set of LVs and partitions (if any).
+ """
+ lv_tags = {}
+ lv_tags['ceph.osd_id'] = osd_id
+ lv_tags['ceph.osd_fsid'] = osd_fsid
+
+ lvs = api.get_lvs(tags=lv_tags)
+ if not lvs:
+ mlogger.error(
+ 'Unable to find any LV for source OSD: id:{} fsid:{}'.format(
+ osd_id, osd_fsid) )
+ raise SystemExit('Unexpected error, terminating')
+
+ devices = set(ensure_associated_lvs(lvs, lv_tags))
+ return [(Device(path), type) for path, type in devices if path]
+
+def ensure_associated_lvs(lvs, lv_tags):
+ """
+ Go through each LV and ensure if backing devices (journal, wal, block)
+ are LVs or partitions, so that they can be accurately reported.
+ """
+ # look for many LVs for each backing type, because it is possible to
+ # receive a filtering for osd.1, and have multiple failed deployments
+ # leaving many journals with osd.1 - usually, only a single LV will be
+ # returned
+
+ block_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'block'}))
+ db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'}))
+ wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'}))
+ backing_devices = [(block_lvs, 'block'), (db_lvs, 'db'),
+ (wal_lvs, 'wal')]
+
+ verified_devices = []
+
+ for lv in lvs:
+ # go through each lv and append it, otherwise query `blkid` to find
+ # a physical device. Do this for each type (journal,db,wal) regardless
+ # if they have been processed in the previous LV, so that bad devices
+ # with the same ID can be caught
+ for ceph_lvs, type in backing_devices:
+
+ if ceph_lvs:
+ verified_devices.extend([(l.lv_path, type) for l in ceph_lvs])
+ continue
+
+ # must be a disk partition, by querying blkid by the uuid we are
+ # ensuring that the device path is always correct
+ try:
+ device_uuid = lv.tags['ceph.{}_uuid'.format(type)]
+ except KeyError:
+ # Bluestore will not have ceph.journal_uuid, and Filestore
+ # will not not have ceph.db_uuid
+ continue
+
+ osd_device = disk.get_device_from_partuuid(device_uuid)
+ if not osd_device:
+ # if the osd_device is not found by the partuuid, then it is
+ # not possible to ensure this device exists anymore, so skip it
+ continue
+ verified_devices.append((osd_device, type))
+
+ return verified_devices
+
+class VolumeTagTracker(object):
+ def __init__(self, devices, target_lv):
+ self.target_lv = target_lv
+ self.data_device = self.db_device = self.wal_device = None
+ for device, type in devices:
+ if type == 'block':
+ self.data_device = device
+ elif type == 'db':
+ self.db_device = device
+ elif type == 'wal':
+ self.wal_device = device
+ if not self.data_device:
+ mlogger.error('Data device not found')
+ raise SystemExit(
+ "Unexpected error, terminating")
+ if not self.data_device.is_lv:
+ mlogger.error('Data device isn\'t LVM')
+ raise SystemExit(
+ "Unexpected error, terminating")
+
+ self.old_target_tags = self.target_lv.tags.copy()
+ self.old_data_tags = (
+ self.data_device.lv_api.tags.copy()
+ if self.data_device.is_lv else None)
+ self.old_db_tags = (
+ self.db_device.lv_api.tags.copy()
+ if self.db_device and self.db_device.is_lv else None)
+ self.old_wal_tags = (
+ self.wal_device.lv_api.tags.copy()
+ if self.wal_device and self.wal_device.is_lv else None)
+
+ def update_tags_when_lv_create(self, create_type):
+ tags = {}
+ if not self.data_device.is_lv:
+ mlogger.warning(
+ 'Data device is not LVM, wouldn\'t update LVM tags')
+ else:
+ tags["ceph.{}_uuid".format(create_type)] = self.target_lv.lv_uuid
+ tags["ceph.{}_device".format(create_type)] = self.target_lv.lv_path
+ self.data_device.lv_api.set_tags(tags)
+
+ tags = self.data_device.lv_api.tags.copy()
+ tags["ceph.type"] = create_type
+ self.target_lv.set_tags(tags)
+
+ aux_dev = None
+ if create_type == "db" and self.wal_device:
+ aux_dev = self.wal_device
+ elif create_type == "wal" and self.db_device:
+ aux_dev = self.db_device
+ else:
+ return
+ if not aux_dev.is_lv:
+ mlogger.warning(
+ '{} device is not LVM, wouldn\'t update LVM tags'.format(
+ create_type.upper()))
+ else:
+ tags = {}
+ tags["ceph.{}_uuid".format(create_type)] = self.target_lv.lv_uuid
+ tags["ceph.{}_device".format(create_type)] = self.target_lv.lv_path
+ aux_dev.lv_api.set_tags(tags)
+
+ def remove_lvs(self, source_devices, target_type):
+ remaining_devices = [self.data_device, self.db_device, self.wal_device]
+
+ outdated_tags = []
+ for device, type in source_devices:
+ if type == "block" or type == target_type:
+ continue
+ remaining_devices.remove(device)
+ if device.is_lv:
+ outdated_tags.append("ceph.{}_uuid".format(type))
+ outdated_tags.append("ceph.{}_device".format(type))
+ device.lv_api.clear_tags()
+ if len(outdated_tags) > 0:
+ for d in remaining_devices:
+ if d and d.is_lv:
+ d.lv_api.clear_tags(outdated_tags)
+
+ def replace_lvs(self, source_devices, target_type):
+ remaining_devices = [self.data_device]
+ if self.db_device:
+ remaining_devices.append(self.db_device)
+ if self.wal_device:
+ remaining_devices.append(self.wal_device)
+
+ outdated_tags = []
+ for device, type in source_devices:
+ if type == "block":
+ continue
+ remaining_devices.remove(device)
+ if device.is_lv:
+ outdated_tags.append("ceph.{}_uuid".format(type))
+ outdated_tags.append("ceph.{}_device".format(type))
+ device.lv_api.clear_tags()
+
+ new_tags = {}
+ new_tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid
+ new_tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path
+
+ for d in remaining_devices:
+ if d and d.is_lv:
+ if len(outdated_tags) > 0:
+ d.lv_api.clear_tags(outdated_tags)
+ d.lv_api.set_tags(new_tags)
+
+ if not self.data_device.is_lv:
+ mlogger.warning(
+ 'Data device is not LVM, wouldn\'t properly update target LVM tags')
+ else:
+ tags = self.data_device.lv_api.tags.copy()
+
+ tags["ceph.type"] = target_type
+ tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid
+ tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path
+ self.target_lv.set_tags(tags)
+
+ def undo(self):
+ mlogger.info(
+ 'Undoing lv tag set')
+ if self.data_device:
+ if self.old_data_tags:
+ self.data_device.lv_api.set_tags(self.old_data_tags)
+ else:
+ self.data_device.lv_api.clear_tags()
+ if self.db_device:
+ if self.old_db_tags:
+ self.db_device.lv_api.set_tags(self.old_db_tags)
+ else:
+ self.db_device.lv_api.clear_tags()
+ if self.wal_device:
+ if self.old_wal_tags:
+ self.wal_device.lv_api.set_tags(self.old_wal_tags)
+ else:
+ self.wal_device.lv_api.clear_tags()
+ if self.old_target_tags:
+ self.target_lv.set_tags(self.old_target_tags)
+ else:
+ self.target_lv.clear_tags()
+
+class Migrate(object):
+
+ help = 'Migrate BlueFS data from to another LVM device'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.osd_id = None
+
+ def get_source_devices(self, devices, target_type=""):
+ ret = []
+ for device, type in devices:
+ if type == target_type:
+ continue
+ if type == 'block':
+ if 'data' not in self.args.from_:
+ continue;
+ elif type == 'db':
+ if 'db' not in self.args.from_:
+ continue;
+ elif type == 'wal':
+ if 'wal' not in self.args.from_:
+ continue;
+ ret.append([device, type])
+ if ret == []:
+ mlogger.error('Source device list is empty')
+ raise SystemExit(
+ 'Unable to migrate to : {}'.format(self.args.target))
+ return ret
+
+ # ceph-bluestore-tool uses the following replacement rules
+ # (in the order of precedence, stop on the first match)
+ # if source list has DB volume - target device replaces it.
+ # if source list has WAL volume - target device replace it.
+ # if source list has slow volume only - operation isn't permitted,
+ # requires explicit allocation via new-db/new-wal command.detects which
+ def get_target_type_by_source(self, devices):
+ ret = None
+ for device, type in devices:
+ if type == 'db':
+ return 'db'
+ elif type == 'wal':
+ ret = 'wal'
+ return ret
+
+ def get_filename_by_type(self, type):
+ filename = 'block'
+ if type == 'db' or type == 'wal':
+ filename += '.' + type
+ return filename
+
+ def get_source_args(self, osd_path, devices):
+ ret = []
+ for device, type in devices:
+ ret = ret + ["--devs-source", os.path.join(
+ osd_path, self.get_filename_by_type(type))]
+ return ret
+
+ def close_encrypted(self, source_devices):
+ # close source device(-s) if they're encrypted and have been removed
+ for device,type in source_devices:
+ if (type == 'db' or type == 'wal'):
+ logger.info("closing dmcrypt volume {}"
+ .format(device.lv_api.lv_uuid))
+ encryption_utils.dmcrypt_close(
+ mapping = device.lv_api.lv_uuid, skip_path_check=True)
+
+ @decorators.needs_root
+ def migrate_to_new(self, osd_id, osd_fsid, devices, target_lv):
+ source_devices = self.get_source_devices(devices)
+ target_type = self.get_target_type_by_source(source_devices)
+ if not target_type:
+ mlogger.error(
+ "Unable to determine new volume type,"
+ " please use new-db or new-wal command before.")
+ raise SystemExit(
+ "Unable to migrate to : {}".format(self.args.target))
+
+ target_path = target_lv.lv_path
+ tag_tracker = VolumeTagTracker(devices, target_lv)
+ # prepare and encrypt target if data volume is encrypted
+ if tag_tracker.data_device.lv_api.encrypted:
+ secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid)
+ mlogger.info(' preparing dmcrypt for {}, uuid {}'.format(target_lv.lv_path, target_lv.lv_uuid))
+ target_path = encryption_utils.prepare_dmcrypt(
+ key=secret, device=target_path, mapping=target_lv.lv_uuid)
+ try:
+ # we need to update lvm tags for all the remaining volumes
+ # and clear for ones which to be removed
+
+ # ceph-bluestore-tool removes source volume(s) other than block one
+ # and attaches target one after successful migration
+ tag_tracker.replace_lvs(source_devices, target_type)
+
+ osd_path = get_osd_path(osd_id, osd_fsid)
+ source_args = self.get_source_args(osd_path, source_devices)
+ mlogger.info("Migrate to new, Source: {} Target: {}".format(
+ source_args, target_path))
+ stdout, stderr, exit_code = process.call([
+ 'ceph-bluestore-tool',
+ '--path',
+ osd_path,
+ '--dev-target',
+ target_path,
+ '--command',
+ 'bluefs-bdev-migrate'] +
+ source_args)
+ if exit_code != 0:
+ mlogger.error(
+ 'Failed to migrate device, error code:{}'.format(exit_code))
+ raise SystemExit(
+ 'Failed to migrate to : {}'.format(self.args.target))
+
+ system.chown(os.path.join(osd_path, "block.{}".format(
+ target_type)))
+ if tag_tracker.data_device.lv_api.encrypted:
+ self.close_encrypted(source_devices)
+ terminal.success('Migration successful.')
+
+ except:
+ tag_tracker.undo()
+ raise
+
+ return
+
+ @decorators.needs_root
+ def migrate_to_existing(self, osd_id, osd_fsid, devices, target_lv):
+ target_type = target_lv.tags["ceph.type"]
+ if target_type == "wal":
+ mlogger.error("Migrate to WAL is not supported")
+ raise SystemExit(
+ "Unable to migrate to : {}".format(self.args.target))
+ target_filename = self.get_filename_by_type(target_type)
+ if (target_filename == ""):
+ mlogger.error(
+ "Target Logical Volume doesn't have proper volume type "
+ "(ceph.type LVM tag): {}".format(target_type))
+ raise SystemExit(
+ "Unable to migrate to : {}".format(self.args.target))
+
+ osd_path = get_osd_path(osd_id, osd_fsid)
+ source_devices = self.get_source_devices(devices, target_type)
+ target_path = os.path.join(osd_path, target_filename)
+ tag_tracker = VolumeTagTracker(devices, target_lv)
+
+ try:
+ # ceph-bluestore-tool removes source volume(s) other than
+ # block and target ones after successful migration
+ tag_tracker.remove_lvs(source_devices, target_type)
+ source_args = self.get_source_args(osd_path, source_devices)
+ mlogger.info("Migrate to existing, Source: {} Target: {}".format(
+ source_args, target_path))
+ stdout, stderr, exit_code = process.call([
+ 'ceph-bluestore-tool',
+ '--path',
+ osd_path,
+ '--dev-target',
+ target_path,
+ '--command',
+ 'bluefs-bdev-migrate'] +
+ source_args)
+ if exit_code != 0:
+ mlogger.error(
+ 'Failed to migrate device, error code:{}'.format(exit_code))
+ raise SystemExit(
+ 'Failed to migrate to : {}'.format(self.args.target))
+ if tag_tracker.data_device.lv_api.encrypted:
+ self.close_encrypted(source_devices)
+ terminal.success('Migration successful.')
+ except:
+ tag_tracker.undo()
+ raise
+
+ return
+
+ @decorators.needs_root
+ def migrate_osd(self):
+ if self.args.osd_id and not self.args.no_systemd:
+ osd_is_running = systemctl.osd_is_active(self.args.osd_id)
+ if osd_is_running:
+ mlogger.error('OSD is running, stop it with: '
+ 'systemctl stop ceph-osd@{}'.format(
+ self.args.osd_id))
+ raise SystemExit(
+ 'Unable to migrate devices associated with OSD ID: {}'
+ .format(self.args.osd_id))
+
+ target_lv = api.get_lv_by_fullname(self.args.target)
+ if not target_lv:
+ mlogger.error(
+ 'Target path "{}" is not a Logical Volume'.format(
+ self.args.target))
+ raise SystemExit(
+ 'Unable to migrate to : {}'.format(self.args.target))
+ devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
+ if (not target_lv.used_by_ceph):
+ self.migrate_to_new(self.args.osd_id, self.args.osd_fsid,
+ devices,
+ target_lv)
+ else:
+ if (target_lv.tags['ceph.osd_id'] != self.args.osd_id or
+ target_lv.tags['ceph.osd_fsid'] != self.args.osd_fsid):
+ mlogger.error(
+ 'Target Logical Volume isn\'t used by the specified OSD: '
+ '{} FSID: {}'.format(self.args.osd_id,
+ self.args.osd_fsid))
+ raise SystemExit(
+ 'Unable to migrate to : {}'.format(self.args.target))
+
+ self.migrate_to_existing(self.args.osd_id, self.args.osd_fsid,
+ devices,
+ target_lv)
+
+ def make_parser(self, prog, sub_command_help):
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ '--osd-id',
+ required=True,
+ help='Specify an OSD ID to detect associated devices for zapping',
+ type=valid_osd_id
+ )
+
+ parser.add_argument(
+ '--osd-fsid',
+ required=True,
+ help='Specify an OSD FSID to detect associated devices for zapping',
+ )
+ parser.add_argument(
+ '--target',
+ required=True,
+ help='Specify target Logical Volume (LV) to migrate data to',
+ )
+ parser.add_argument(
+ '--from',
+ nargs='*',
+ dest='from_',
+ required=True,
+ choices=['data', 'db', 'wal'],
+ help='Copy BlueFS data from DB device',
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip checking OSD systemd unit',
+ )
+ return parser
+
+ def main(self):
+ sub_command_help = dedent("""
+ Moves BlueFS data from source volume(s) to the target one, source
+ volumes (except the main (i.e. data or block) one) are removed on
+ success. LVM volumes are permitted for Target only, both already
+ attached or new logical one. In the latter case it is attached to OSD
+ replacing one of the source devices. Following replacement rules apply
+ (in the order of precedence, stop on the first match):
+ * if source list has DB volume - target device replaces it.
+ * if source list has WAL volume - target device replace it.
+ * if source list has slow volume only - operation is not permitted,
+ requires explicit allocation via new-db/new-wal command.
+
+ Example calls for supported scenarios:
+
+ Moves BlueFS data from main device to LV already attached as DB:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/db
+
+ Moves BlueFS data from shared main device to LV which will be attached
+ as a new DB:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/new_db
+
+ Moves BlueFS data from DB device to new LV, DB is replaced:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db --target vgname/new_db
+
+ Moves BlueFS data from main and DB devices to new LV, DB is replaced:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db --target vgname/new_db
+
+ Moves BlueFS data from main, DB and WAL devices to new LV, WAL is
+ removed and DB is replaced:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db wal --target vgname/new_db
+
+ Moves BlueFS data from main, DB and WAL devices to main device, WAL
+ and DB are removed:
+
+ ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db wal --target vgname/data
+
+ """)
+
+ parser = self.make_parser('ceph-volume lvm migrate', sub_command_help)
+
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+
+ self.args = parser.parse_args(self.argv)
+
+ self.migrate_osd()
+
+class NewVolume(object):
+ def __init__(self, create_type, argv):
+ self.create_type = create_type
+ self.argv = argv
+
+ def make_parser(self, prog, sub_command_help):
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ '--osd-id',
+ required=True,
+ help='Specify an OSD ID to attach new volume to',
+ type=valid_osd_id,
+ )
+
+ parser.add_argument(
+ '--osd-fsid',
+ required=True,
+ help='Specify an OSD FSIDto attach new volume to',
+ )
+ parser.add_argument(
+ '--target',
+ required=True,
+ help='Specify target Logical Volume (LV) to attach',
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip checking OSD systemd unit',
+ )
+ return parser
+
+ @decorators.needs_root
+ def make_new_volume(self, osd_id, osd_fsid, devices, target_lv):
+ osd_path = get_osd_path(osd_id, osd_fsid)
+ mlogger.info(
+ 'Making new volume at {} for OSD: {} ({})'.format(
+ target_lv.lv_path, osd_id, osd_path))
+ target_path = target_lv.lv_path
+ tag_tracker = VolumeTagTracker(devices, target_lv)
+ # prepare and encrypt target if data volume is encrypted
+ if tag_tracker.data_device.lv_api.encrypted:
+ secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid)
+ mlogger.info(' preparing dmcrypt for {}, uuid {}'.format(target_lv.lv_path, target_lv.lv_uuid))
+ target_path = encryption_utils.prepare_dmcrypt(
+ key=secret, device=target_path, mapping=target_lv.lv_uuid)
+
+ try:
+ tag_tracker.update_tags_when_lv_create(self.create_type)
+
+ stdout, stderr, exit_code = process.call([
+ 'ceph-bluestore-tool',
+ '--path',
+ osd_path,
+ '--dev-target',
+ target_path,
+ '--command',
+ 'bluefs-bdev-new-{}'.format(self.create_type)
+ ])
+ if exit_code != 0:
+ mlogger.error(
+ 'failed to attach new volume, error code:{}'.format(
+ exit_code))
+ raise SystemExit(
+ "Failed to attach new volume: {}".format(
+ self.args.target))
+ else:
+ system.chown(os.path.join(osd_path, "block.{}".format(
+ self.create_type)))
+ terminal.success('New volume attached.')
+ except:
+ tag_tracker.undo()
+ raise
+ return
+
+ @decorators.needs_root
+ def new_volume(self):
+ if self.args.osd_id and not self.args.no_systemd:
+ osd_is_running = systemctl.osd_is_active(self.args.osd_id)
+ if osd_is_running:
+ mlogger.error('OSD ID is running, stop it with:'
+ ' systemctl stop ceph-osd@{}'.format(self.args.osd_id))
+ raise SystemExit(
+ 'Unable to attach new volume for OSD: {}'.format(
+ self.args.osd_id))
+
+ target_lv = api.get_lv_by_fullname(self.args.target)
+ if not target_lv:
+ mlogger.error(
+ 'Target path {} is not a Logical Volume'.format(
+ self.args.target))
+ raise SystemExit(
+ 'Unable to attach new volume : {}'.format(self.args.target))
+ if target_lv.used_by_ceph:
+ mlogger.error(
+ 'Target Logical Volume is already used by ceph: {}'.format(
+ self.args.target))
+ raise SystemExit(
+ 'Unable to attach new volume : {}'.format(self.args.target))
+ else:
+ devices = find_associated_devices(self.args.osd_id,
+ self.args.osd_fsid)
+ self.make_new_volume(
+ self.args.osd_id,
+ self.args.osd_fsid,
+ devices,
+ target_lv)
+
+class NewWAL(NewVolume):
+
+ help = 'Allocate new WAL volume for OSD at specified Logical Volume'
+
+ def __init__(self, argv):
+ super(NewWAL, self).__init__("wal", argv)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Attaches the given logical volume to the given OSD as a WAL volume.
+ Logical volume format is vg/lv. Fails if OSD has already got attached DB.
+
+ Example:
+
+ Attach vgname/lvname as a WAL volume to OSD 1
+
+ ceph-volume lvm new-wal --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_wal
+ """)
+ parser = self.make_parser('ceph-volume lvm new-wal', sub_command_help)
+
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+
+ self.args = parser.parse_args(self.argv)
+
+ self.new_volume()
+
+class NewDB(NewVolume):
+
+ help = 'Allocate new DB volume for OSD at specified Logical Volume'
+
+ def __init__(self, argv):
+ super(NewDB, self).__init__("db", argv)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Attaches the given logical volume to the given OSD as a DB volume.
+ Logical volume format is vg/lv. Fails if OSD has already got attached DB.
+
+ Example:
+
+ Attach vgname/lvname as a DB volume to OSD 1
+
+ ceph-volume lvm new-db --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_db
+ """)
+
+ parser = self.make_parser('ceph-volume lvm new-db', sub_command_help)
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ self.args = parser.parse_args(self.argv)
+
+ self.new_volume()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/prepare.py b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
new file mode 100644
index 000000000..85c8a1467
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
@@ -0,0 +1,327 @@
+from __future__ import print_function
+import json
+import logging
+from textwrap import dedent
+from ceph_volume.util import prepare as prepare_utils
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume.util import system, disk
+from ceph_volume.util.arg_validators import exclude_group_options
+from ceph_volume import conf, decorators, terminal
+from ceph_volume.api import lvm as api
+from .common import prepare_parser, rollback_osd
+
+
+logger = logging.getLogger(__name__)
+
+
+def prepare_dmcrypt(key, device, device_type, tags):
+ """
+ Helper for devices that are encrypted. The operations needed for
+ block, db, wal devices are all the same
+ """
+ if not device:
+ return ''
+ tag_name = 'ceph.%s_uuid' % device_type
+ uuid = tags[tag_name]
+ return encryption_utils.prepare_dmcrypt(key, device, uuid)
+
+def prepare_bluestore(block, wal, db, secrets, tags, osd_id, fsid):
+ """
+ :param block: The name of the logical volume for the bluestore data
+ :param wal: a regular/plain disk or logical volume, to be used for block.wal
+ :param db: a regular/plain disk or logical volume, to be used for block.db
+ :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
+ :param id_: The OSD id
+ :param fsid: The OSD fsid, also known as the OSD UUID
+ """
+ cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
+ # encryption-only operations
+ if secrets.get('dmcrypt_key'):
+ # If encrypted, there is no need to create the lockbox keyring file because
+ # bluestore re-creates the files and does not have support for other files
+ # like the custom lockbox one. This will need to be done on activation.
+ # format and open ('decrypt' devices) and re-assign the device and journal
+ # variables so that the rest of the process can use the mapper paths
+ key = secrets['dmcrypt_key']
+ block = prepare_dmcrypt(key, block, 'block', tags)
+ wal = prepare_dmcrypt(key, wal, 'wal', tags)
+ db = prepare_dmcrypt(key, db, 'db', tags)
+
+ # create the directory
+ prepare_utils.create_osd_path(osd_id, tmpfs=True)
+ # symlink the block
+ prepare_utils.link_block(block, osd_id)
+ # get the latest monmap
+ prepare_utils.get_monmap(osd_id)
+ # write the OSD keyring if it doesn't exist already
+ prepare_utils.write_keyring(osd_id, cephx_secret)
+ # prepare the osd filesystem
+ prepare_utils.osd_mkfs_bluestore(
+ osd_id, fsid,
+ keyring=cephx_secret,
+ wal=wal,
+ db=db
+ )
+
+
+class Prepare(object):
+
+ help = 'Format an LVM device and associate it with an OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.osd_id = None
+
+ def get_ptuuid(self, argument):
+ uuid = disk.get_partuuid(argument)
+ if not uuid:
+ terminal.error('blkid could not detect a PARTUUID for device: %s' % argument)
+ raise RuntimeError('unable to use device')
+ return uuid
+
+ def setup_device(self, device_type, device_name, tags, size, slots):
+ """
+ Check if ``device`` is an lv, if so, set the tags, making sure to
+ update the tags with the lv_uuid and lv_path which the incoming tags
+ will not have.
+
+ If the device is not a logical volume, then retrieve the partition UUID
+ by querying ``blkid``
+ """
+ if device_name is None:
+ return '', '', tags
+ tags['ceph.type'] = device_type
+ tags['ceph.vdo'] = api.is_vdo(device_name)
+
+ try:
+ vg_name, lv_name = device_name.split('/')
+ lv = api.get_single_lv(filters={'lv_name': lv_name,
+ 'vg_name': vg_name})
+ except ValueError:
+ lv = None
+
+ if lv:
+ lv_uuid = lv.lv_uuid
+ path = lv.lv_path
+ tags['ceph.%s_uuid' % device_type] = lv_uuid
+ tags['ceph.%s_device' % device_type] = path
+ lv.set_tags(tags)
+ elif disk.is_device(device_name):
+ # We got a disk, create an lv
+ lv_type = "osd-{}".format(device_type)
+ name_uuid = system.generate_uuid()
+ kwargs = {
+ 'device': device_name,
+ 'tags': tags,
+ 'slots': slots
+ }
+ #TODO use get_block_db_size and co here to get configured size in
+ #conf file
+ if size != 0:
+ kwargs['size'] = size
+ lv = api.create_lv(
+ lv_type,
+ name_uuid,
+ **kwargs)
+ path = lv.lv_path
+ tags['ceph.{}_device'.format(device_type)] = path
+ tags['ceph.{}_uuid'.format(device_type)] = lv.lv_uuid
+ lv_uuid = lv.lv_uuid
+ lv.set_tags(tags)
+ else:
+ # otherwise assume this is a regular disk partition
+ name_uuid = self.get_ptuuid(device_name)
+ path = device_name
+ tags['ceph.%s_uuid' % device_type] = name_uuid
+ tags['ceph.%s_device' % device_type] = path
+ lv_uuid = name_uuid
+ return path, lv_uuid, tags
+
+ def prepare_data_device(self, device_type, osd_uuid):
+ """
+ Check if ``arg`` is a device or partition to create an LV out of it
+ with a distinct volume group name, assigning LV tags on it and
+ ultimately, returning the logical volume object. Failing to detect
+ a device or partition will result in error.
+
+ :param arg: The value of ``--data`` when parsing args
+ :param device_type: Usually ``block``
+ :param osd_uuid: The OSD uuid
+ """
+ device = self.args.data
+ if disk.is_partition(device) or disk.is_device(device):
+ # we must create a vg, and then a single lv
+ lv_name_prefix = "osd-{}".format(device_type)
+ kwargs = {'device': device,
+ 'tags': {'ceph.type': device_type},
+ 'slots': self.args.data_slots,
+ }
+ logger.debug('data device size: {}'.format(self.args.data_size))
+ if self.args.data_size != 0:
+ kwargs['size'] = self.args.data_size
+ return api.create_lv(
+ lv_name_prefix,
+ osd_uuid,
+ **kwargs)
+ else:
+ error = [
+ 'Cannot use device ({}).'.format(device),
+ 'A vg/lv path or an existing device is needed']
+ raise RuntimeError(' '.join(error))
+
+ raise RuntimeError('no data logical volume found with: {}'.format(device))
+
+ def safe_prepare(self, args=None):
+ """
+ An intermediate step between `main()` and `prepare()` so that we can
+ capture the `self.osd_id` in case we need to rollback
+
+ :param args: Injected args, usually from `lvm create` which compounds
+ both `prepare` and `create`
+ """
+ if args is not None:
+ self.args = args
+
+ try:
+ vgname, lvname = self.args.data.split('/')
+ lv = api.get_single_lv(filters={'lv_name': lvname,
+ 'vg_name': vgname})
+ except ValueError:
+ lv = None
+
+ if api.is_ceph_device(lv):
+ logger.info("device {} is already used".format(self.args.data))
+ raise RuntimeError("skipping {}, it is already prepared".format(self.args.data))
+ try:
+ self.prepare()
+ except Exception:
+ logger.exception('lvm prepare was unable to complete')
+ logger.info('will rollback OSD ID creation')
+ rollback_osd(self.args, self.osd_id)
+ raise
+ terminal.success("ceph-volume lvm prepare successful for: %s" % self.args.data)
+
+ def get_cluster_fsid(self):
+ """
+ Allows using --cluster-fsid as an argument, but can fallback to reading
+ from ceph.conf if that is unset (the default behavior).
+ """
+ if self.args.cluster_fsid:
+ return self.args.cluster_fsid
+ else:
+ return conf.ceph.get('global', 'fsid')
+
+ @decorators.needs_root
+ def prepare(self):
+ # FIXME we don't allow re-using a keyring, we always generate one for the
+ # OSD, this needs to be fixed. This could either be a file (!) or a string
+ # (!!) or some flags that we would need to compound into a dict so that we
+ # can convert to JSON (!!!)
+ secrets = {'cephx_secret': prepare_utils.create_key()}
+ cephx_lockbox_secret = ''
+ encrypted = 1 if self.args.dmcrypt else 0
+ cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key()
+
+ if encrypted:
+ secrets['dmcrypt_key'] = encryption_utils.create_dmcrypt_key()
+ secrets['cephx_lockbox_secret'] = cephx_lockbox_secret
+
+ cluster_fsid = self.get_cluster_fsid()
+
+ osd_fsid = self.args.osd_fsid or system.generate_uuid()
+ crush_device_class = self.args.crush_device_class
+ if crush_device_class:
+ secrets['crush_device_class'] = crush_device_class
+ # reuse a given ID if it exists, otherwise create a new ID
+ self.osd_id = prepare_utils.create_id(osd_fsid, json.dumps(secrets), osd_id=self.args.osd_id)
+ tags = {
+ 'ceph.osd_fsid': osd_fsid,
+ 'ceph.osd_id': self.osd_id,
+ 'ceph.cluster_fsid': cluster_fsid,
+ 'ceph.cluster_name': conf.cluster,
+ 'ceph.crush_device_class': crush_device_class,
+ 'ceph.osdspec_affinity': prepare_utils.get_osdspec_affinity()
+ }
+ if self.args.bluestore:
+ try:
+ vg_name, lv_name = self.args.data.split('/')
+ block_lv = api.get_single_lv(filters={'lv_name': lv_name,
+ 'vg_name': vg_name})
+ except ValueError:
+ block_lv = None
+
+ if not block_lv:
+ block_lv = self.prepare_data_device('block', osd_fsid)
+
+ tags['ceph.block_device'] = block_lv.lv_path
+ tags['ceph.block_uuid'] = block_lv.lv_uuid
+ tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
+ tags['ceph.encrypted'] = encrypted
+ tags['ceph.vdo'] = api.is_vdo(block_lv.lv_path)
+
+ wal_device, wal_uuid, tags = self.setup_device(
+ 'wal',
+ self.args.block_wal,
+ tags,
+ self.args.block_wal_size,
+ self.args.block_wal_slots)
+ db_device, db_uuid, tags = self.setup_device(
+ 'db',
+ self.args.block_db,
+ tags,
+ self.args.block_db_size,
+ self.args.block_db_slots)
+
+ tags['ceph.type'] = 'block'
+ block_lv.set_tags(tags)
+
+ prepare_bluestore(
+ block_lv.lv_path,
+ wal_device,
+ db_device,
+ secrets,
+ tags,
+ self.osd_id,
+ osd_fsid,
+ )
+
+ def main(self):
+ sub_command_help = dedent("""
+ Prepare an OSD by assigning an ID and FSID, registering them with the
+ cluster with an ID and FSID, formatting and mounting the volume, and
+ finally by adding all the metadata to the logical volumes using LVM
+ tags, so that it can later be discovered.
+
+ Once the OSD is ready, an ad-hoc systemd unit will be enabled so that
+ it can later get activated and the OSD daemon can get started.
+
+ Encryption is supported via dmcrypt and the --dmcrypt flag.
+
+ Existing logical volume (lv):
+
+ ceph-volume lvm prepare --data {vg/lv}
+
+ Existing block device (a logical volume will be created):
+
+ ceph-volume lvm prepare --data /path/to/device
+
+ Optionally, can consume db and wal devices, partitions or logical
+ volumes. A device will get a logical volume, partitions and existing
+ logical volumes will be used as is:
+
+ ceph-volume lvm prepare --data {vg/lv} --block.wal {partition} --block.db {/path/to/device}
+ """)
+ parser = prepare_parser(
+ prog='ceph-volume lvm prepare',
+ description=sub_command_help,
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ exclude_group_options(parser, argv=self.argv, groups=['bluestore'])
+ self.args = parser.parse_args(self.argv)
+ # Default to bluestore here since defaulting it in add_argument may
+ # cause both to be True
+ if not self.args.bluestore:
+ self.args.bluestore = True
+ self.safe_prepare()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/trigger.py b/src/ceph-volume/ceph_volume/devices/lvm/trigger.py
new file mode 100644
index 000000000..dc57011df
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/trigger.py
@@ -0,0 +1,70 @@
+from __future__ import print_function
+import argparse
+from textwrap import dedent
+from ceph_volume.exceptions import SuffixParsingError
+from ceph_volume import decorators
+from .activate import Activate
+
+
+def parse_osd_id(string):
+ osd_id = string.split('-', 1)[0]
+ if not osd_id:
+ raise SuffixParsingError('OSD id', string)
+ if osd_id.isdigit():
+ return osd_id
+ raise SuffixParsingError('OSD id', string)
+
+
+def parse_osd_uuid(string):
+ osd_id = '%s-' % parse_osd_id(string)
+ # remove the id first
+ osd_uuid = string.split(osd_id, 1)[-1]
+ if not osd_uuid:
+ raise SuffixParsingError('OSD uuid', string)
+ return osd_uuid
+
+
+class Trigger(object):
+
+ help = 'systemd helper to activate an OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def main(self):
+ sub_command_help = dedent("""
+ ** DO NOT USE DIRECTLY **
+ This tool is meant to help the systemd unit that knows about OSDs.
+
+ Proxy OSD activation to ``ceph-volume lvm activate`` by parsing the
+ input from systemd, detecting the UUID and ID associated with an OSD::
+
+ ceph-volume lvm trigger {SYSTEMD-DATA}
+
+ The systemd "data" is expected to be in the format of::
+
+ {OSD ID}-{OSD UUID}
+
+ The lvs associated with the OSD need to have been prepared previously,
+ so that all needed tags and metadata exist.
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm trigger',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'systemd_data',
+ metavar='SYSTEMD_DATA',
+ nargs='?',
+ help='Data from a systemd unit containing ID and UUID of the OSD, like asdf-lkjh-0'
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ osd_id = parse_osd_id(args.systemd_data)
+ osd_uuid = parse_osd_uuid(args.systemd_data)
+ Activate(['--auto-detect-objectstore', osd_id, osd_uuid]).main()
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
new file mode 100644
index 000000000..d4d78ad01
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
@@ -0,0 +1,405 @@
+import argparse
+import os
+import logging
+import time
+
+from textwrap import dedent
+
+from ceph_volume import decorators, terminal, process
+from ceph_volume.api import lvm as api
+from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int, merge_dict
+from ceph_volume.util.device import Device
+from ceph_volume.systemd import systemctl
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+
+def wipefs(path):
+ """
+ Removes the filesystem from an lv or partition.
+
+ Environment variables supported::
+
+ * ``CEPH_VOLUME_WIPEFS_TRIES``: Defaults to 8
+ * ``CEPH_VOLUME_WIPEFS_INTERVAL``: Defaults to 5
+
+ """
+ tries = str_to_int(
+ os.environ.get('CEPH_VOLUME_WIPEFS_TRIES', 8)
+ )
+ interval = str_to_int(
+ os.environ.get('CEPH_VOLUME_WIPEFS_INTERVAL', 5)
+ )
+
+ for trying in range(tries):
+ stdout, stderr, exit_code = process.call([
+ 'wipefs',
+ '--all',
+ path
+ ])
+ if exit_code != 0:
+ # this could narrow the retry by poking in the stderr of the output
+ # to verify that 'probing initialization failed' appears, but
+ # better to be broad in this retry to prevent missing on
+ # a different message that needs to be retried as well
+ terminal.warning(
+ 'failed to wipefs device, will try again to workaround probable race condition'
+ )
+ time.sleep(interval)
+ else:
+ return
+ raise RuntimeError("could not complete wipefs on device: %s" % path)
+
+
+def zap_data(path):
+ """
+ Clears all data from the given path. Path should be
+ an absolute path to an lv or partition.
+
+ 10M of data is written to the path to make sure that
+ there is no trace left of any previous Filesystem.
+ """
+ process.run([
+ 'dd',
+ 'if=/dev/zero',
+ 'of={path}'.format(path=path),
+ 'bs=1M',
+ 'count=10',
+ 'conv=fsync'
+ ])
+
+
+def find_associated_devices(osd_id=None, osd_fsid=None):
+ """
+ From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
+ system that match those tag values, further detect if any partitions are
+ part of the OSD, and then return the set of LVs and partitions (if any).
+ """
+ lv_tags = {}
+ if osd_id:
+ lv_tags['ceph.osd_id'] = osd_id
+ if osd_fsid:
+ lv_tags['ceph.osd_fsid'] = osd_fsid
+
+ lvs = api.get_lvs(tags=lv_tags)
+ if not lvs:
+ raise RuntimeError('Unable to find any LV for zapping OSD: '
+ '%s' % osd_id or osd_fsid)
+
+ devices_to_zap = ensure_associated_lvs(lvs, lv_tags)
+ return [Device(path) for path in set(devices_to_zap) if path]
+
+
+def ensure_associated_lvs(lvs, lv_tags={}):
+ """
+ Go through each LV and ensure if backing devices (journal, wal, block)
+ are LVs or partitions, so that they can be accurately reported.
+ """
+ # look for many LVs for each backing type, because it is possible to
+ # receive a filtering for osd.1, and have multiple failed deployments
+ # leaving many journals with osd.1 - usually, only a single LV will be
+ # returned
+
+ db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'}))
+ wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'}))
+ backing_devices = [(db_lvs, 'db'),
+ (wal_lvs, 'wal')]
+
+ verified_devices = []
+
+ for lv in lvs:
+ # go through each lv and append it, otherwise query `blkid` to find
+ # a physical device. Do this for each type (journal,db,wal) regardless
+ # if they have been processed in the previous LV, so that bad devices
+ # with the same ID can be caught
+ for ceph_lvs, _type in backing_devices:
+ if ceph_lvs:
+ verified_devices.extend([l.lv_path for l in ceph_lvs])
+ continue
+
+ # must be a disk partition, by querying blkid by the uuid we are
+ # ensuring that the device path is always correct
+ try:
+ device_uuid = lv.tags['ceph.%s_uuid' % _type]
+ except KeyError:
+ # Bluestore will not have ceph.journal_uuid, and Filestore
+ # will not not have ceph.db_uuid
+ continue
+
+ osd_device = disk.get_device_from_partuuid(device_uuid)
+ if not osd_device:
+ # if the osd_device is not found by the partuuid, then it is
+ # not possible to ensure this device exists anymore, so skip it
+ continue
+ verified_devices.append(osd_device)
+
+ verified_devices.append(lv.lv_path)
+
+ # reduce the list from all the duplicates that were added
+ return list(set(verified_devices))
+
+
+class Zap(object):
+
+ help = 'Removes all data and filesystems from a logical volume or partition.'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def unmount_lv(self, lv):
+ if lv.tags.get('ceph.cluster_name') and lv.tags.get('ceph.osd_id'):
+ lv_path = "/var/lib/ceph/osd/{}-{}".format(lv.tags['ceph.cluster_name'], lv.tags['ceph.osd_id'])
+ else:
+ lv_path = lv.lv_path
+ dmcrypt_uuid = lv.lv_uuid
+ dmcrypt = lv.encrypted
+ if system.path_is_mounted(lv_path):
+ mlogger.info("Unmounting %s", lv_path)
+ system.unmount(lv_path)
+ if dmcrypt and dmcrypt_uuid:
+ self.dmcrypt_close(dmcrypt_uuid)
+
+ def zap_lv(self, device):
+ """
+ Device examples: vg-name/lv-name, /dev/vg-name/lv-name
+ Requirements: Must be a logical volume (LV)
+ """
+ lv = api.get_single_lv(filters={'lv_name': device.lv_name, 'vg_name':
+ device.vg_name})
+ self.unmount_lv(lv)
+
+ wipefs(device.path)
+ zap_data(device.path)
+
+ if self.args.destroy:
+ lvs = api.get_lvs(filters={'vg_name': device.vg_name})
+ if lvs == []:
+ mlogger.info('No LVs left, exiting', device.vg_name)
+ return
+ elif len(lvs) <= 1:
+ mlogger.info('Only 1 LV left in VG, will proceed to destroy '
+ 'volume group %s', device.vg_name)
+ pvs = api.get_pvs(filters={'lv_uuid': lv.lv_uuid})
+ api.remove_vg(device.vg_name)
+ for pv in pvs:
+ api.remove_pv(pv.pv_name)
+ else:
+ mlogger.info('More than 1 LV left in VG, will proceed to '
+ 'destroy LV only')
+ mlogger.info('Removing LV because --destroy was given: %s',
+ device.path)
+ api.remove_lv(device.path)
+ elif lv:
+ # just remove all lvm metadata, leaving the LV around
+ lv.clear_tags()
+
+ def zap_partition(self, device):
+ """
+ Device example: /dev/sda1
+ Requirements: Must be a partition
+ """
+ if device.is_encrypted:
+ # find the holder
+ holders = [
+ '/dev/%s' % holder for holder in device.sys_api.get('holders', [])
+ ]
+ for mapper_uuid in os.listdir('/dev/mapper'):
+ mapper_path = os.path.join('/dev/mapper', mapper_uuid)
+ if os.path.realpath(mapper_path) in holders:
+ self.dmcrypt_close(mapper_uuid)
+
+ if system.device_is_mounted(device.path):
+ mlogger.info("Unmounting %s", device.path)
+ system.unmount(device.path)
+
+ wipefs(device.path)
+ zap_data(device.path)
+
+ if self.args.destroy:
+ mlogger.info("Destroying partition since --destroy was used: %s" % device.path)
+ disk.remove_partition(device)
+
+ def zap_lvm_member(self, device):
+ """
+ An LVM member may have more than one LV and or VG, for example if it is
+ a raw device with multiple partitions each belonging to a different LV
+
+ Device example: /dev/sda
+ Requirements: An LV or VG present in the device, making it an LVM member
+ """
+ for lv in device.lvs:
+ if lv.lv_name:
+ mlogger.info('Zapping lvm member {}. lv_path is {}'.format(device.path, lv.lv_path))
+ self.zap_lv(Device(lv.lv_path))
+ else:
+ vg = api.get_single_vg(filters={'vg_name': lv.vg_name})
+ if vg:
+ mlogger.info('Found empty VG {}, removing'.format(vg.vg_name))
+ api.remove_vg(vg.vg_name)
+
+
+
+ def zap_raw_device(self, device):
+ """
+ Any whole (raw) device passed in as input will be processed here,
+ checking for LVM membership and partitions (if any).
+
+ Device example: /dev/sda
+ Requirements: None
+ """
+ if not self.args.destroy:
+ # the use of dd on a raw device causes the partition table to be
+ # destroyed
+ mlogger.warning(
+ '--destroy was not specified, but zapping a whole device will remove the partition table'
+ )
+
+ # look for partitions and zap those
+ for part_name in device.sys_api.get('partitions', {}).keys():
+ self.zap_partition(Device('/dev/%s' % part_name))
+
+ wipefs(device.path)
+ zap_data(device.path)
+
+ @decorators.needs_root
+ def zap(self, devices=None):
+ devices = devices or self.args.devices
+
+ for device in devices:
+ mlogger.info("Zapping: %s", device.path)
+ if device.is_mapper and not device.is_mpath:
+ terminal.error("Refusing to zap the mapper device: {}".format(device))
+ raise SystemExit(1)
+ if device.is_lvm_member:
+ self.zap_lvm_member(device)
+ if device.is_lv:
+ self.zap_lv(device)
+ if device.is_partition:
+ self.zap_partition(device)
+ if device.is_device:
+ self.zap_raw_device(device)
+
+ if self.args.devices:
+ terminal.success(
+ "Zapping successful for: %s" % ", ".join([str(d) for d in self.args.devices])
+ )
+ else:
+ identifier = self.args.osd_id or self.args.osd_fsid
+ terminal.success(
+ "Zapping successful for OSD: %s" % identifier
+ )
+
+ @decorators.needs_root
+ def zap_osd(self):
+ if self.args.osd_id and not self.args.no_systemd:
+ osd_is_running = systemctl.osd_is_active(self.args.osd_id)
+ if osd_is_running:
+ mlogger.error("OSD ID %s is running, stop it with:" % self.args.osd_id)
+ mlogger.error("systemctl stop ceph-osd@%s" % self.args.osd_id)
+ raise SystemExit("Unable to zap devices associated with OSD ID: %s" % self.args.osd_id)
+ devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
+ self.zap(devices)
+
+ def dmcrypt_close(self, dmcrypt_uuid):
+ mlogger.info("Closing encrypted volume %s", dmcrypt_uuid)
+ encryption.dmcrypt_close(mapping=dmcrypt_uuid, skip_path_check=True)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Zaps the given logical volume(s), raw device(s) or partition(s) for reuse by ceph-volume.
+ If given a path to a logical volume it must be in the format of vg/lv. Any
+ filesystems present on the given device, vg/lv, or partition will be removed and
+ all data will be purged.
+
+ If the logical volume, raw device or partition is being used for any ceph related
+ mount points they will be unmounted.
+
+ However, the lv or partition will be kept intact.
+
+ Example calls for supported scenarios:
+
+ Zapping a logical volume:
+
+ ceph-volume lvm zap {vg name/lv name}
+
+ Zapping a partition:
+
+ ceph-volume lvm zap /dev/sdc1
+
+ Zapping many raw devices:
+
+ ceph-volume lvm zap /dev/sda /dev/sdb /db/sdc
+
+ Zapping devices associated with an OSD ID:
+
+ ceph-volume lvm zap --osd-id 1
+
+ Optionally include the OSD FSID
+
+ ceph-volume lvm zap --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D
+
+ If the --destroy flag is given and you are zapping a raw device or partition
+ then all vgs and lvs that exist on that raw device or partition will be destroyed.
+
+ This is especially useful if a raw device or partition was used by ceph-volume lvm create
+ or ceph-volume lvm prepare commands previously and now you want to reuse that device.
+
+ For example:
+
+ ceph-volume lvm zap /dev/sda --destroy
+
+ If the --destroy flag is given and you are zapping an lv then the lv is still
+ kept intact for reuse.
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume lvm zap',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'devices',
+ metavar='DEVICES',
+ nargs='*',
+ type=arg_validators.ValidZapDevice(gpt_ok=True),
+ default=[],
+ help='Path to one or many lv (as vg/lv), partition (as /dev/sda1) or device (as /dev/sda)'
+ )
+
+ parser.add_argument(
+ '--destroy',
+ action='store_true',
+ default=False,
+ help='Destroy all volume groups and logical volumes if you are zapping a raw device or partition',
+ )
+
+ parser.add_argument(
+ '--osd-id',
+ type=arg_validators.valid_osd_id,
+ help='Specify an OSD ID to detect associated devices for zapping',
+ )
+
+ parser.add_argument(
+ '--osd-fsid',
+ help='Specify an OSD FSID to detect associated devices for zapping',
+ )
+
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip systemd unit checks',
+ )
+
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+
+ self.args = parser.parse_args(self.argv)
+
+ if self.args.osd_id or self.args.osd_fsid:
+ self.zap_osd()
+ else:
+ self.zap()
diff --git a/src/ceph-volume/ceph_volume/devices/raw/__init__.py b/src/ceph-volume/ceph_volume/devices/raw/__init__.py
new file mode 100644
index 000000000..dd0a6534c
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/__init__.py
@@ -0,0 +1 @@
+from .main import Raw # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/raw/activate.py b/src/ceph-volume/ceph_volume/devices/raw/activate.py
new file mode 100644
index 000000000..17be57dfe
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/activate.py
@@ -0,0 +1,166 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+from textwrap import dedent
+from ceph_volume import process, conf, decorators, terminal
+from ceph_volume.util import system
+from ceph_volume.util import prepare as prepare_utils
+from .list import direct_report
+
+
+logger = logging.getLogger(__name__)
+
+def activate_bluestore(meta, tmpfs, systemd):
+ # find the osd
+ osd_id = meta['osd_id']
+ osd_uuid = meta['osd_uuid']
+
+ # mount on tmpfs the osd directory
+ osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+ if not system.path_is_mounted(osd_path):
+ # mkdir -p and mount as tmpfs
+ prepare_utils.create_osd_path(osd_id, tmpfs=tmpfs)
+
+ # XXX This needs to be removed once ceph-bluestore-tool can deal with
+ # symlinks that exist in the osd dir
+ for link_name in ['block', 'block.db', 'block.wal']:
+ link_path = os.path.join(osd_path, link_name)
+ if os.path.exists(link_path):
+ os.unlink(os.path.join(osd_path, link_name))
+
+ # Once symlinks are removed, the osd dir can be 'primed again. chown first,
+ # regardless of what currently exists so that ``prime-osd-dir`` can succeed
+ # even if permissions are somehow messed up
+ system.chown(osd_path)
+ prime_command = [
+ 'ceph-bluestore-tool',
+ 'prime-osd-dir',
+ '--path', osd_path,
+ '--no-mon-config',
+ '--dev', meta['device'],
+ ]
+ process.run(prime_command)
+
+ # always re-do the symlink regardless if it exists, so that the block,
+ # block.wal, and block.db devices that may have changed can be mapped
+ # correctly every time
+ prepare_utils.link_block(meta['device'], osd_id)
+
+ if 'device_db' in meta:
+ prepare_utils.link_db(meta['device_db'], osd_id, osd_uuid)
+
+ if 'device_wal' in meta:
+ prepare_utils.link_wal(meta['device_wal'], osd_id, osd_uuid)
+
+ system.chown(osd_path)
+ terminal.success("ceph-volume raw activate successful for osd ID: %s" % osd_id)
+
+
+class Activate(object):
+
+ help = 'Discover and prepare a data directory for a (BlueStore) OSD on a raw device'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.args = None
+
+ @decorators.needs_root
+ def activate(self, devs, start_osd_id, start_osd_uuid,
+ tmpfs, systemd):
+ """
+ :param args: The parsed arguments coming from the CLI
+ """
+ assert devs or start_osd_id or start_osd_uuid
+ found = direct_report(devs)
+
+ activated_any = False
+ for osd_uuid, meta in found.items():
+ osd_id = meta['osd_id']
+ if start_osd_id is not None and str(osd_id) != str(start_osd_id):
+ continue
+ if start_osd_uuid is not None and osd_uuid != start_osd_uuid:
+ continue
+ logger.info('Activating osd.%s uuid %s cluster %s' % (
+ osd_id, osd_uuid, meta['ceph_fsid']))
+ activate_bluestore(meta,
+ tmpfs=tmpfs,
+ systemd=systemd)
+ activated_any = True
+
+ if not activated_any:
+ raise RuntimeError('did not find any matching OSD to activate')
+
+ def main(self):
+ sub_command_help = dedent("""
+ Activate (BlueStore) OSD on a raw block device(s) based on the
+ device label (normally the first block of the device).
+
+ ceph-volume raw activate [/dev/sdb2 ...]
+
+ or
+
+ ceph-volume raw activate --osd-id NUM --osd-uuid UUID
+
+ The device(s) associated with the OSD need to have been prepared
+ previously, so that all needed tags and metadata exist.
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume raw activate',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+ parser.add_argument(
+ '--device',
+ help='The device for the OSD to start'
+ )
+ parser.add_argument(
+ '--osd-id',
+ help='OSD ID to activate'
+ )
+ parser.add_argument(
+ '--osd-uuid',
+ help='OSD UUID to active'
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip creating and enabling systemd units and starting OSD services'
+ )
+ parser.add_argument(
+ '--block.db',
+ dest='block_db',
+ help='Path to bluestore block.db block device'
+ )
+ parser.add_argument(
+ '--block.wal',
+ dest='block_wal',
+ help='Path to bluestore block.wal block device'
+ )
+ parser.add_argument(
+ '--no-tmpfs',
+ action='store_true',
+ help='Do not use a tmpfs mount for OSD data dir'
+ )
+
+ if not self.argv:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ self.args = args
+ if not args.no_systemd:
+ terminal.error('systemd support not yet implemented')
+ raise SystemExit(1)
+
+ devs = [args.device]
+ if args.block_wal:
+ devs.append(args.block_wal)
+ if args.block_db:
+ devs.append(args.block_db)
+
+ self.activate(devs=devs,
+ start_osd_id=args.osd_id,
+ start_osd_uuid=args.osd_uuid,
+ tmpfs=not args.no_tmpfs,
+ systemd=not self.args.no_systemd)
diff --git a/src/ceph-volume/ceph_volume/devices/raw/common.py b/src/ceph-volume/ceph_volume/devices/raw/common.py
new file mode 100644
index 000000000..89ee285be
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/common.py
@@ -0,0 +1,58 @@
+import argparse
+from ceph_volume.util import arg_validators
+
+def create_parser(prog, description):
+ """
+ Both prepare and create share the same parser, those are defined here to
+ avoid duplication
+ """
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=description,
+ )
+ parser.add_argument(
+ '--data',
+ required=True,
+ type=arg_validators.ValidRawDevice(as_string=True),
+ help='a raw device to use for the OSD',
+ )
+ parser.add_argument(
+ '--bluestore',
+ action='store_true',
+ help='Use BlueStore backend')
+ parser.add_argument(
+ '--crush-device-class',
+ dest='crush_device_class',
+ help='Crush device class to assign this OSD to',
+ default=""
+ )
+ parser.add_argument(
+ '--no-tmpfs',
+ action='store_true',
+ help='Do not use a tmpfs mount for OSD data dir'
+ )
+ parser.add_argument(
+ '--block.db',
+ dest='block_db',
+ help='Path to bluestore block.db block device',
+ type=arg_validators.ValidRawDevice(as_string=True)
+ )
+ parser.add_argument(
+ '--block.wal',
+ dest='block_wal',
+ help='Path to bluestore block.wal block device',
+ type=arg_validators.ValidRawDevice(as_string=True)
+ )
+ parser.add_argument(
+ '--dmcrypt',
+ action='store_true',
+ help='Enable device encryption via dm-crypt',
+ )
+ parser.add_argument(
+ '--osd-id',
+ help='Reuse an existing OSD id',
+ default=None,
+ type=arg_validators.valid_osd_id,
+ )
+ return parser
diff --git a/src/ceph-volume/ceph_volume/devices/raw/list.py b/src/ceph-volume/ceph_volume/devices/raw/list.py
new file mode 100644
index 000000000..794bb18c1
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/list.py
@@ -0,0 +1,174 @@
+from __future__ import print_function
+import argparse
+import json
+import logging
+from textwrap import dedent
+from ceph_volume import decorators, process
+from ceph_volume.util import disk
+from typing import Any, Dict, List
+
+logger = logging.getLogger(__name__)
+
+
+def direct_report(devices):
+ """
+ Other non-cli consumers of listing information will want to consume the
+ report without the need to parse arguments or other flags. This helper
+ bypasses the need to deal with the class interface which is meant for cli
+ handling.
+ """
+ _list = List([])
+ return _list.generate(devices)
+
+def _get_bluestore_info(dev):
+ out, err, rc = process.call([
+ 'ceph-bluestore-tool', 'show-label',
+ '--dev', dev], verbose_on_failure=False)
+ if rc:
+ # ceph-bluestore-tool returns an error (below) if device is not bluestore OSD
+ # > unable to read label for <device>: (2) No such file or directory
+ # but it's possible the error could be for a different reason (like if the disk fails)
+ logger.debug('assuming device {} is not BlueStore; ceph-bluestore-tool failed to get info from device: {}\n{}'.format(dev, out, err))
+ return None
+ oj = json.loads(''.join(out))
+ if dev not in oj:
+ # should be impossible, so warn
+ logger.warning('skipping device {} because it is not reported in ceph-bluestore-tool output: {}'.format(dev, out))
+ return None
+ try:
+ r = {
+ 'osd_uuid': oj[dev]['osd_uuid'],
+ }
+ if oj[dev]['description'] == 'main':
+ whoami = oj[dev]['whoami']
+ r.update({
+ 'type': 'bluestore',
+ 'osd_id': int(whoami),
+ 'ceph_fsid': oj[dev]['ceph_fsid'],
+ 'device': dev,
+ })
+ elif oj[dev]['description'] == 'bluefs db':
+ r['device_db'] = dev
+ elif oj[dev]['description'] == 'bluefs wal':
+ r['device_wal'] = dev
+ return r
+ except KeyError as e:
+ # this will appear for devices that have a bluestore header but aren't valid OSDs
+ # for example, due to incomplete rollback of OSDs: https://tracker.ceph.com/issues/51869
+ logger.error('device {} does not have all BlueStore data needed to be a valid OSD: {}\n{}'.format(dev, out, e))
+ return None
+
+
+class List(object):
+
+ help = 'list BlueStore OSDs on raw devices'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def is_atari_partitions(self, _lsblk: Dict[str, Any]) -> bool:
+ dev = _lsblk['NAME']
+ if _lsblk.get('PKNAME'):
+ parent = _lsblk['PKNAME']
+ try:
+ if disk.has_bluestore_label(parent):
+ logger.warning(('ignoring child device {} whose parent {} is a BlueStore OSD.'.format(dev, parent),
+ 'device is likely a phantom Atari partition. device info: {}'.format(_lsblk)))
+ return True
+ except OSError as e:
+ logger.error(('ignoring child device {} to avoid reporting invalid BlueStore data from phantom Atari partitions.'.format(dev),
+ 'failed to determine if parent device {} is BlueStore. err: {}'.format(parent, e)))
+ return True
+ return False
+
+ def exclude_atari_partitions(self, _lsblk_all: Dict[str, Any]) -> List[Dict[str, Any]]:
+ return [_lsblk for _lsblk in _lsblk_all if not self.is_atari_partitions(_lsblk)]
+
+ def generate(self, devs=None):
+ logger.debug('Listing block devices via lsblk...')
+ info_devices = []
+ if not devs or not any(devs):
+ # If no devs are given initially, we want to list ALL devices including children and
+ # parents. Parent disks with child partitions may be the appropriate device to return if
+ # the parent disk has a bluestore header, but children may be the most appropriate
+ # devices to return if the parent disk does not have a bluestore header.
+ info_devices = disk.lsblk_all(abspath=True)
+ devs = [device['NAME'] for device in info_devices if device.get('NAME',)]
+ else:
+ for dev in devs:
+ info_devices.append(disk.lsblk(dev, abspath=True))
+
+ # Linux kernels built with CONFIG_ATARI_PARTITION enabled can falsely interpret
+ # bluestore's on-disk format as an Atari partition table. These false Atari partitions
+ # can be interpreted as real OSDs if a bluestore OSD was previously created on the false
+ # partition. See https://tracker.ceph.com/issues/52060 for more info. If a device has a
+ # parent, it is a child. If the parent is a valid bluestore OSD, the child will only
+ # exist if it is a phantom Atari partition, and the child should be ignored. If the
+ # parent isn't bluestore, then the child could be a valid bluestore OSD. If we fail to
+ # determine whether a parent is bluestore, we should err on the side of not reporting
+ # the child so as not to give a false negative.
+ info_devices = self.exclude_atari_partitions(info_devices)
+
+ result = {}
+ logger.debug('inspecting devices: {}'.format(devs))
+ for info_device in info_devices:
+ bs_info = _get_bluestore_info(info_device['NAME'])
+ if bs_info is None:
+ # None is also returned in the rare event that there is an issue reading info from
+ # a BlueStore disk, so be sure to log our assumption that it isn't bluestore
+ logger.info('device {} does not have BlueStore information'.format(info_device['NAME']))
+ continue
+ uuid = bs_info['osd_uuid']
+ if uuid not in result:
+ result[uuid] = {}
+ result[uuid].update(bs_info)
+
+ return result
+
+ @decorators.needs_root
+ def list(self, args):
+ report = self.generate(args.device)
+ if args.format == 'json':
+ print(json.dumps(report, indent=4, sort_keys=True))
+ else:
+ if not report:
+ raise SystemExit('No valid Ceph devices found')
+ raise RuntimeError('not implemented yet')
+
+ def main(self):
+ sub_command_help = dedent("""
+ List OSDs on raw devices with raw device labels (usually the first
+ block of the device).
+
+ Full listing of all identifiable (currently, BlueStore) OSDs
+ on raw devices:
+
+ ceph-volume raw list
+
+ List a particular device, reporting all metadata about it::
+
+ ceph-volume raw list /dev/sda1
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume raw list',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'device',
+ metavar='DEVICE',
+ nargs='*',
+ help='Path to a device like /dev/sda1'
+ )
+
+ parser.add_argument(
+ '--format',
+ help='output format, defaults to "pretty"',
+ default='json',
+ choices=['json', 'pretty'],
+ )
+
+ args = parser.parse_args(self.argv)
+ self.list(args)
diff --git a/src/ceph-volume/ceph_volume/devices/raw/main.py b/src/ceph-volume/ceph_volume/devices/raw/main.py
new file mode 100644
index 000000000..efa251090
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/main.py
@@ -0,0 +1,40 @@
+import argparse
+from textwrap import dedent
+from ceph_volume import terminal
+from . import list
+from . import prepare
+from . import activate
+
+class Raw(object):
+
+ help = 'Manage single-device OSDs on raw block devices'
+
+ _help = dedent("""
+ Manage a single-device OSD on a raw block device. Rely on
+ the existing device labels to store any needed metadata.
+
+ {sub_help}
+ """)
+
+ mapper = {
+ 'list': list.List,
+ 'prepare': prepare.Prepare,
+ 'activate': activate.Activate,
+ }
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def print_help(self, sub_help):
+ return self._help.format(sub_help=sub_help)
+
+ def main(self):
+ terminal.dispatch(self.mapper, self.argv)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume raw',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=self.print_help(terminal.subhelp(self.mapper)),
+ )
+ parser.parse_args(self.argv)
+ if len(self.argv) <= 1:
+ return parser.print_help()
diff --git a/src/ceph-volume/ceph_volume/devices/raw/prepare.py b/src/ceph-volume/ceph_volume/devices/raw/prepare.py
new file mode 100644
index 000000000..b3201a89d
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/prepare.py
@@ -0,0 +1,160 @@
+from __future__ import print_function
+import json
+import logging
+import os
+from textwrap import dedent
+from ceph_volume.util import prepare as prepare_utils
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume.util import disk
+from ceph_volume.util import system
+from ceph_volume import decorators, terminal
+from ceph_volume.devices.lvm.common import rollback_osd
+from .common import create_parser
+
+logger = logging.getLogger(__name__)
+
+def prepare_dmcrypt(key, device, device_type, fsid):
+ """
+ Helper for devices that are encrypted. The operations needed for
+ block, db, wal, devices are all the same
+ """
+ if not device:
+ return ''
+ kname = disk.lsblk(device)['KNAME']
+ mapping = 'ceph-{}-{}-{}-dmcrypt'.format(fsid, kname, device_type)
+ return encryption_utils.prepare_dmcrypt(key, device, mapping)
+
+def prepare_bluestore(block, wal, db, secrets, osd_id, fsid, tmpfs):
+ """
+ :param block: The name of the logical volume for the bluestore data
+ :param wal: a regular/plain disk or logical volume, to be used for block.wal
+ :param db: a regular/plain disk or logical volume, to be used for block.db
+ :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
+ :param id_: The OSD id
+ :param fsid: The OSD fsid, also known as the OSD UUID
+ """
+ cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
+
+ if secrets.get('dmcrypt_key'):
+ key = secrets['dmcrypt_key']
+ block = prepare_dmcrypt(key, block, 'block', fsid)
+ wal = prepare_dmcrypt(key, wal, 'wal', fsid)
+ db = prepare_dmcrypt(key, db, 'db', fsid)
+
+ # create the directory
+ prepare_utils.create_osd_path(osd_id, tmpfs=tmpfs)
+ # symlink the block
+ prepare_utils.link_block(block, osd_id)
+ # get the latest monmap
+ prepare_utils.get_monmap(osd_id)
+ # write the OSD keyring if it doesn't exist already
+ prepare_utils.write_keyring(osd_id, cephx_secret)
+ # prepare the osd filesystem
+ prepare_utils.osd_mkfs_bluestore(
+ osd_id, fsid,
+ keyring=cephx_secret,
+ wal=wal,
+ db=db
+ )
+
+
+class Prepare(object):
+
+ help = 'Format a raw device and associate it with a (BlueStore) OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.osd_id = None
+
+ def safe_prepare(self, args=None):
+ """
+ An intermediate step between `main()` and `prepare()` so that we can
+ capture the `self.osd_id` in case we need to rollback
+
+ :param args: Injected args, usually from `raw create` which compounds
+ both `prepare` and `create`
+ """
+ if args is not None:
+ self.args = args
+ try:
+ self.prepare()
+ except Exception:
+ logger.exception('raw prepare was unable to complete')
+ logger.info('will rollback OSD ID creation')
+ rollback_osd(self.args, self.osd_id)
+ raise
+ dmcrypt_log = 'dmcrypt' if args.dmcrypt else 'clear'
+ terminal.success("ceph-volume raw {} prepare successful for: {}".format(dmcrypt_log, self.args.data))
+
+
+ @decorators.needs_root
+ def prepare(self):
+ secrets = {'cephx_secret': prepare_utils.create_key()}
+ encrypted = 1 if self.args.dmcrypt else 0
+ cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key()
+
+ if encrypted:
+ secrets['dmcrypt_key'] = os.getenv('CEPH_VOLUME_DMCRYPT_SECRET')
+ secrets['cephx_lockbox_secret'] = cephx_lockbox_secret # dummy value to make `ceph osd new` not complaining
+
+ osd_fsid = system.generate_uuid()
+ crush_device_class = self.args.crush_device_class
+ if crush_device_class:
+ secrets['crush_device_class'] = crush_device_class
+ tmpfs = not self.args.no_tmpfs
+ wal = ""
+ db = ""
+ if self.args.block_wal:
+ wal = self.args.block_wal
+ if self.args.block_db:
+ db = self.args.block_db
+
+ # reuse a given ID if it exists, otherwise create a new ID
+ self.osd_id = prepare_utils.create_id(
+ osd_fsid,
+ json.dumps(secrets),
+ osd_id=self.args.osd_id)
+
+ prepare_bluestore(
+ self.args.data,
+ wal,
+ db,
+ secrets,
+ self.osd_id,
+ osd_fsid,
+ tmpfs,
+ )
+
+ def main(self):
+ sub_command_help = dedent("""
+ Prepare an OSD by assigning an ID and FSID, registering them with the
+ cluster with an ID and FSID, formatting the volume.
+
+ Once the OSD is ready, an ad-hoc systemd unit will be enabled so that
+ it can later get activated and the OSD daemon can get started.
+
+ ceph-volume raw prepare --bluestore --data {device}
+
+ DB and WAL devices are supported.
+
+ ceph-volume raw prepare --bluestore --data {device} --block.db {device} --block.wal {device}
+
+ """)
+ parser = create_parser(
+ prog='ceph-volume raw prepare',
+ description=sub_command_help,
+ )
+ if not self.argv:
+ print(sub_command_help)
+ return
+ self.args = parser.parse_args(self.argv)
+ if not self.args.bluestore:
+ terminal.error('must specify --bluestore (currently the only supported backend)')
+ raise SystemExit(1)
+ if self.args.dmcrypt and not os.getenv('CEPH_VOLUME_DMCRYPT_SECRET'):
+ terminal.error('encryption was requested (--dmcrypt) but environment variable ' \
+ 'CEPH_VOLUME_DMCRYPT_SECRET is not set, you must set ' \
+ 'this variable to provide a dmcrypt secret.')
+ raise SystemExit(1)
+
+ self.safe_prepare(self.args)
diff --git a/src/ceph-volume/ceph_volume/devices/simple/__init__.py b/src/ceph-volume/ceph_volume/devices/simple/__init__.py
new file mode 100644
index 000000000..280e130ed
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/simple/__init__.py
@@ -0,0 +1 @@
+from .main import Simple # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/simple/activate.py b/src/ceph-volume/ceph_volume/devices/simple/activate.py
new file mode 100644
index 000000000..f3dcdcef8
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/simple/activate.py
@@ -0,0 +1,282 @@
+from __future__ import print_function
+import argparse
+import base64
+import glob
+import json
+import logging
+import os
+from textwrap import dedent
+from ceph_volume import process, decorators, terminal, conf
+from ceph_volume.util import system, disk
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume.systemd import systemctl
+
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+
+class Activate(object):
+
+ help = 'Enable systemd units to mount configured devices and start a Ceph OSD'
+
+ def __init__(self, argv, from_trigger=False):
+ self.argv = argv
+ self.from_trigger = from_trigger
+ self.skip_systemd = False
+
+ def validate_devices(self, json_config):
+ """
+ ``json_config`` is the loaded dictionary coming from the JSON file. It is usually mixed with
+ other non-device items, but for sakes of comparison it doesn't really matter. This method is
+ just making sure that the keys needed exist
+ """
+ devices = json_config.keys()
+ try:
+ objectstore = json_config['type']
+ except KeyError:
+ logger.warning(
+ '"type" key not found, assuming "bluestore" since journal key is not present'
+ )
+ objectstore = 'bluestore'
+
+ # Go through all the device combinations that are absolutely required,
+ # raise an error describing what was expected and what was found
+ # otherwise.
+ if objectstore == 'bluestore':
+ # This is a bit tricky, with newer bluestore we don't need data, older implementations
+ # do (e.g. with ceph-disk). ceph-volume just uses a tmpfs that doesn't require data.
+ if {'block', 'data'}.issubset(set(devices)):
+ return True
+ else:
+ bluestore_devices = ['block.db', 'block.wal', 'block', 'data']
+ found = [i for i in devices if i in bluestore_devices]
+ mlogger.error("Required devices (block and data) not present for bluestore")
+ mlogger.error('bluestore devices found: %s', found)
+ raise RuntimeError('Unable to activate bluestore OSD due to missing devices')
+
+ def get_device(self, uuid):
+ """
+ If a device is encrypted, it will decrypt/open and return the mapper
+ path, if it isn't encrypted it will just return the device found that
+ is mapped to the uuid. This will make it easier for the caller to
+ avoid if/else to check if devices need decrypting
+
+ :param uuid: The partition uuid of the device (PARTUUID)
+ """
+ device = disk.get_device_from_partuuid(uuid)
+
+ # If device is not found, it is fine to return an empty string from the
+ # helper that finds `device`. If it finds anything and it is not
+ # encrypted, just return what was found
+ if not self.is_encrypted or not device:
+ return device
+
+ if self.encryption_type == 'luks':
+ encryption_utils.luks_open(self.dmcrypt_secret, device, uuid)
+ else:
+ encryption_utils.plain_open(self.dmcrypt_secret, device, uuid)
+
+ return '/dev/mapper/%s' % uuid
+
+ def enable_systemd_units(self, osd_id, osd_fsid):
+ """
+ * disables the ceph-disk systemd units to prevent them from running when
+ a UDEV event matches Ceph rules
+ * creates the ``simple`` systemd units to handle the activation and
+ startup of the OSD with ``osd_id`` and ``osd_fsid``
+ * enables the OSD systemd unit and finally starts the OSD.
+ """
+ if not self.from_trigger and not self.skip_systemd:
+ # means it was scanned and now activated directly, so ensure that
+ # ceph-disk units are disabled, and that the `simple` systemd unit
+ # is created and enabled
+
+ # enable the ceph-volume unit for this OSD
+ systemctl.enable_volume(osd_id, osd_fsid, 'simple')
+
+ # disable any/all ceph-disk units
+ systemctl.mask_ceph_disk()
+ terminal.warning(
+ ('All ceph-disk systemd units have been disabled to '
+ 'prevent OSDs getting triggered by UDEV events')
+ )
+ else:
+ terminal.info('Skipping enabling of `simple` systemd unit')
+ terminal.info('Skipping masking of ceph-disk systemd units')
+
+ if not self.skip_systemd:
+ # enable the OSD
+ systemctl.enable_osd(osd_id)
+
+ # start the OSD
+ systemctl.start_osd(osd_id)
+ else:
+ terminal.info(
+ 'Skipping enabling and starting OSD simple systemd unit because --no-systemd was used'
+ )
+
+ @decorators.needs_root
+ def activate(self, args):
+ with open(args.json_config, 'r') as fp:
+ osd_metadata = json.load(fp)
+
+ # Make sure that required devices are configured
+ self.validate_devices(osd_metadata)
+
+ osd_id = osd_metadata.get('whoami', args.osd_id)
+ osd_fsid = osd_metadata.get('fsid', args.osd_fsid)
+ data_uuid = osd_metadata.get('data', {}).get('uuid')
+ conf.cluster = osd_metadata.get('cluster_name', 'ceph')
+ if not data_uuid:
+ raise RuntimeError(
+ 'Unable to activate OSD %s - no "uuid" key found for data' % args.osd_id
+ )
+
+ # Encryption detection, and capturing of the keys to decrypt
+ self.is_encrypted = osd_metadata.get('encrypted', False)
+ self.encryption_type = osd_metadata.get('encryption_type')
+ if self.is_encrypted:
+ lockbox_secret = osd_metadata.get('lockbox.keyring')
+ # write the keyring always so that we can unlock
+ encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret)
+ # Store the secret around so that the decrypt method can reuse
+ raw_dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid)
+ # Note how both these calls need b64decode. For some reason, the
+ # way ceph-disk creates these keys, it stores them in the monitor
+ # *undecoded*, requiring this decode call again. The lvm side of
+ # encryption doesn't need it, so we are assuming here that anything
+ # that `simple` scans, will come from ceph-disk and will need this
+ # extra decode call here
+ self.dmcrypt_secret = base64.b64decode(raw_dmcrypt_secret)
+
+ cluster_name = osd_metadata.get('cluster_name', 'ceph')
+ osd_dir = '/var/lib/ceph/osd/%s-%s' % (cluster_name, osd_id)
+
+ # XXX there is no support for LVM here
+ data_device = self.get_device(data_uuid)
+
+ if not data_device:
+ raise RuntimeError("osd fsid {} doesn't exist, this file will "
+ "be skipped, consider cleaning legacy "
+ "json file {}".format(osd_metadata['fsid'], args.json_config))
+
+ block_device = self.get_device(osd_metadata.get('block', {}).get('uuid'))
+ block_db_device = self.get_device(osd_metadata.get('block.db', {}).get('uuid'))
+ block_wal_device = self.get_device(osd_metadata.get('block.wal', {}).get('uuid'))
+
+ if not system.device_is_mounted(data_device, destination=osd_dir):
+ process.run(['mount', '-v', data_device, osd_dir])
+
+ device_map = {
+ 'block': block_device,
+ 'block.db': block_db_device,
+ 'block.wal': block_wal_device
+ }
+
+ for name, device in device_map.items():
+ if not device:
+ continue
+ # always re-do the symlink regardless if it exists, so that the journal
+ # device path that may have changed can be mapped correctly every time
+ destination = os.path.join(osd_dir, name)
+ process.run(['ln', '-snf', device, destination])
+
+ # make sure that the journal has proper permissions
+ system.chown(device)
+
+ self.enable_systemd_units(osd_id, osd_fsid)
+
+ terminal.success('Successfully activated OSD %s with FSID %s' % (osd_id, osd_fsid))
+
+ def main(self):
+ sub_command_help = dedent("""
+ Activate OSDs by mounting devices previously configured to their
+ appropriate destination::
+
+ ceph-volume simple activate {ID} {FSID}
+
+ Or using a JSON file directly::
+
+ ceph-volume simple activate --file /etc/ceph/osd/{ID}-{FSID}.json
+
+ The OSD must have been "scanned" previously (see ``ceph-volume simple
+ scan``), so that all needed OSD device information and metadata exist.
+
+ A previously scanned OSD would exist like::
+
+ /etc/ceph/osd/{ID}-{FSID}.json
+
+
+ Environment variables supported:
+
+ CEPH_VOLUME_SIMPLE_JSON_DIR: Directory location for scanned OSD JSON configs
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume simple activate',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+ parser.add_argument(
+ 'osd_id',
+ metavar='ID',
+ nargs='?',
+ help='The ID of the OSD, usually an integer, like 0'
+ )
+ parser.add_argument(
+ 'osd_fsid',
+ metavar='FSID',
+ nargs='?',
+ help='The FSID of the OSD, similar to a SHA1'
+ )
+ parser.add_argument(
+ '--all',
+ help='Activate all OSDs with a OSD JSON config',
+ action='store_true',
+ default=False,
+ )
+ parser.add_argument(
+ '--file',
+ help='The path to a JSON file, from a scanned OSD'
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='skip_systemd',
+ action='store_true',
+ help='Skip creating and enabling systemd units and starting OSD services',
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ if not args.file and not args.all:
+ if not args.osd_id and not args.osd_fsid:
+ terminal.error('ID and FSID are required to find the right OSD to activate')
+ terminal.error('from a scanned OSD location in /etc/ceph/osd/')
+ raise RuntimeError('Unable to activate without both ID and FSID')
+ # don't allow a CLI flag to specify the JSON dir, because that might
+ # implicitly indicate that it would be possible to activate a json file
+ # at a non-default location which would not work at boot time if the
+ # custom location is not exposed through an ENV var
+ self.skip_systemd = args.skip_systemd
+ json_dir = os.environ.get('CEPH_VOLUME_SIMPLE_JSON_DIR', '/etc/ceph/osd/')
+ if args.all:
+ if args.file or args.osd_id:
+ mlogger.warn('--all was passed, ignoring --file and ID/FSID arguments')
+ json_configs = glob.glob('{}/*.json'.format(json_dir))
+ for json_config in json_configs:
+ mlogger.info('activating OSD specified in {}'.format(json_config))
+ args.json_config = json_config
+ try:
+ self.activate(args)
+ except RuntimeError as e:
+ terminal.warning(e.message)
+ else:
+ if args.file:
+ json_config = args.file
+ else:
+ json_config = os.path.join(json_dir, '%s-%s.json' % (args.osd_id, args.osd_fsid))
+ if not os.path.exists(json_config):
+ raise RuntimeError('Expected JSON config path not found: %s' % json_config)
+ args.json_config = json_config
+ self.activate(args)
diff --git a/src/ceph-volume/ceph_volume/devices/simple/main.py b/src/ceph-volume/ceph_volume/devices/simple/main.py
new file mode 100644
index 000000000..2119963f8
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/simple/main.py
@@ -0,0 +1,41 @@
+import argparse
+from textwrap import dedent
+from ceph_volume import terminal
+from . import scan
+from . import activate
+from . import trigger
+
+
+class Simple(object):
+
+ help = 'Manage already deployed OSDs with ceph-volume'
+
+ _help = dedent("""
+ Take over a deployed OSD, persisting its metadata in /etc/ceph/osd/ so that it can be managed
+ with ceph-volume directly. Avoids UDEV and ceph-disk handling.
+
+ {sub_help}
+ """)
+
+ mapper = {
+ 'scan': scan.Scan,
+ 'activate': activate.Activate,
+ 'trigger': trigger.Trigger,
+ }
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def print_help(self, sub_help):
+ return self._help.format(sub_help=sub_help)
+
+ def main(self):
+ terminal.dispatch(self.mapper, self.argv)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume simple',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=self.print_help(terminal.subhelp(self.mapper)),
+ )
+ parser.parse_args(self.argv)
+ if len(self.argv) <= 1:
+ return parser.print_help()
diff --git a/src/ceph-volume/ceph_volume/devices/simple/scan.py b/src/ceph-volume/ceph_volume/devices/simple/scan.py
new file mode 100644
index 000000000..ff7040beb
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/simple/scan.py
@@ -0,0 +1,385 @@
+from __future__ import print_function
+import argparse
+import base64
+import json
+import logging
+import os
+from textwrap import dedent
+from ceph_volume import decorators, terminal, conf
+from ceph_volume.api import lvm
+from ceph_volume.systemd import systemctl
+from ceph_volume.util import arg_validators, system, disk, encryption
+from ceph_volume.util.device import Device
+
+
+logger = logging.getLogger(__name__)
+
+
+def parse_keyring(file_contents):
+ """
+ Extract the actual key from a string. Usually from a keyring file, where
+ the keyring will be in a client section. In the case of a lockbox, it is
+ something like::
+
+ [client.osd-lockbox.8d7a8ab2-5db0-4f83-a785-2809aba403d5]\n\tkey = AQDtoGha/GYJExAA7HNl7Ukhqr7AKlCpLJk6UA==\n
+
+ From the above case, it would return::
+
+ AQDtoGha/GYJExAA7HNl7Ukhqr7AKlCpLJk6UA==
+ """
+ # remove newlines that might be trailing
+ keyring = file_contents.strip('\n')
+
+ # Now split on spaces
+ keyring = keyring.split(' ')[-1]
+
+ # Split on newlines
+ keyring = keyring.split('\n')[-1]
+
+ return keyring.strip()
+
+
+class Scan(object):
+
+ help = 'Capture metadata from all running ceph-disk OSDs, OSD data partition or directory'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self._etc_path = '/etc/ceph/osd/'
+
+ @property
+ def etc_path(self):
+ if os.path.isdir(self._etc_path):
+ return self._etc_path
+
+ if not os.path.exists(self._etc_path):
+ os.mkdir(self._etc_path)
+ return self._etc_path
+
+ error = "OSD Configuration path (%s) needs to be a directory" % self._etc_path
+ raise RuntimeError(error)
+
+ def get_contents(self, path):
+ with open(path, 'r') as fp:
+ contents = fp.readlines()
+ if len(contents) > 1:
+ return ''.join(contents)
+ return ''.join(contents).strip().strip('\n')
+
+ def scan_device(self, path):
+ device_metadata = {'path': None, 'uuid': None}
+ if not path:
+ return device_metadata
+ if self.is_encrypted:
+ encryption_metadata = encryption.legacy_encrypted(path)
+ device_metadata['path'] = encryption_metadata['device']
+ device_metadata['uuid'] = disk.get_partuuid(encryption_metadata['device'])
+ return device_metadata
+ # cannot read the symlink if this is tmpfs
+ if os.path.islink(path):
+ device = os.readlink(path)
+ else:
+ device = path
+ lvm_device = lvm.get_single_lv(filters={'lv_path': device})
+ if lvm_device:
+ device_uuid = lvm_device.lv_uuid
+ else:
+ device_uuid = disk.get_partuuid(device)
+
+ device_metadata['uuid'] = device_uuid
+ device_metadata['path'] = device
+
+ return device_metadata
+
+ def scan_directory(self, path):
+ osd_metadata = {'cluster_name': conf.cluster}
+ directory_files = os.listdir(path)
+ if 'keyring' not in directory_files:
+ raise RuntimeError(
+ 'OSD files not found, required "keyring" file is not present at: %s' % path
+ )
+ for file_ in os.listdir(path):
+ file_path = os.path.join(path, file_)
+ file_json_key = file_
+ if file_.endswith('_dmcrypt'):
+ file_json_key = file_.rstrip('_dmcrypt')
+ logger.info(
+ 'reading file {}, stripping _dmcrypt suffix'.format(file_)
+ )
+ if os.path.islink(file_path):
+ if os.path.exists(file_path):
+ osd_metadata[file_json_key] = self.scan_device(file_path)
+ else:
+ msg = 'broken symlink found %s -> %s' % (file_path, os.path.realpath(file_path))
+ terminal.warning(msg)
+ logger.warning(msg)
+
+ if os.path.isdir(file_path):
+ continue
+
+ # the check for binary needs to go before the file, to avoid
+ # capturing data from binary files but still be able to capture
+ # contents from actual files later
+ try:
+ if system.is_binary(file_path):
+ logger.info('skipping binary file: %s' % file_path)
+ continue
+ except IOError:
+ logger.exception('skipping due to IOError on file: %s' % file_path)
+ continue
+ if os.path.isfile(file_path):
+ content = self.get_contents(file_path)
+ if 'keyring' in file_path:
+ content = parse_keyring(content)
+ try:
+ osd_metadata[file_json_key] = int(content)
+ except ValueError:
+ osd_metadata[file_json_key] = content
+
+ # we must scan the paths again because this might be a temporary mount
+ path_mounts = system.Mounts(paths=True)
+ device = path_mounts.get_mounts().get(path)
+
+ # it is possible to have more than one device, pick the first one, and
+ # warn that it is possible that more than one device is 'data'
+ if not device:
+ terminal.error('Unable to detect device mounted for path: %s' % path)
+ raise RuntimeError('Cannot activate OSD')
+ osd_metadata['data'] = self.scan_device(device[0] if len(device) else None)
+
+ return osd_metadata
+
+ def scan_encrypted(self, directory=None):
+ device = self.encryption_metadata['device']
+ lockbox = self.encryption_metadata['lockbox']
+ encryption_type = self.encryption_metadata['type']
+ osd_metadata = {}
+ # Get the PARTUUID of the device to make sure have the right one and
+ # that maps to the data device
+ device_uuid = disk.get_partuuid(device)
+ dm_path = '/dev/mapper/%s' % device_uuid
+ # check if this partition is already mapped
+ device_status = encryption.status(device_uuid)
+
+ # capture all the information from the lockbox first, reusing the
+ # directory scan method
+ if self.device_mounts.get(lockbox):
+ lockbox_path = self.device_mounts.get(lockbox)[0]
+ lockbox_metadata = self.scan_directory(lockbox_path)
+ # ceph-disk stores the fsid as osd-uuid in the lockbox, thanks ceph-disk
+ dmcrypt_secret = encryption.get_dmcrypt_key(
+ None, # There is no ID stored in the lockbox
+ lockbox_metadata['osd-uuid'],
+ os.path.join(lockbox_path, 'keyring')
+ )
+ else:
+ with system.tmp_mount(lockbox) as lockbox_path:
+ lockbox_metadata = self.scan_directory(lockbox_path)
+ # ceph-disk stores the fsid as osd-uuid in the lockbox, thanks ceph-disk
+ dmcrypt_secret = encryption.get_dmcrypt_key(
+ None, # There is no ID stored in the lockbox
+ lockbox_metadata['osd-uuid'],
+ os.path.join(lockbox_path, 'keyring')
+ )
+
+ if not device_status:
+ # Note how both these calls need b64decode. For some reason, the
+ # way ceph-disk creates these keys, it stores them in the monitor
+ # *undecoded*, requiring this decode call again. The lvm side of
+ # encryption doesn't need it, so we are assuming here that anything
+ # that `simple` scans, will come from ceph-disk and will need this
+ # extra decode call here
+ dmcrypt_secret = base64.b64decode(dmcrypt_secret)
+ if encryption_type == 'luks':
+ encryption.luks_open(dmcrypt_secret, device, device_uuid)
+ else:
+ encryption.plain_open(dmcrypt_secret, device, device_uuid)
+
+ # If we have a directory, use that instead of checking for mounts
+ if directory:
+ osd_metadata = self.scan_directory(directory)
+ else:
+ # Now check if that mapper is mounted already, to avoid remounting and
+ # decrypting the device
+ dm_path_mount = self.device_mounts.get(dm_path)
+ if dm_path_mount:
+ osd_metadata = self.scan_directory(dm_path_mount[0])
+ else:
+ with system.tmp_mount(dm_path, encrypted=True) as device_path:
+ osd_metadata = self.scan_directory(device_path)
+
+ osd_metadata['encrypted'] = True
+ osd_metadata['encryption_type'] = encryption_type
+ osd_metadata['lockbox.keyring'] = parse_keyring(lockbox_metadata['keyring'])
+ return osd_metadata
+
+ @decorators.needs_root
+ def scan(self, args):
+ osd_metadata = {'cluster_name': conf.cluster}
+ osd_path = None
+ logger.info('detecting if argument is a device or a directory: %s', args.osd_path)
+ if os.path.isdir(args.osd_path):
+ logger.info('will scan directly, path is a directory')
+ osd_path = args.osd_path
+ else:
+ # assume this is a device, check if it is mounted and use that path
+ logger.info('path is not a directory, will check if mounted')
+ if system.device_is_mounted(args.osd_path):
+ logger.info('argument is a device, which is mounted')
+ mounted_osd_paths = self.device_mounts.get(args.osd_path)
+ osd_path = mounted_osd_paths[0] if len(mounted_osd_paths) else None
+
+ # argument is not a directory, and it is not a device that is mounted
+ # somewhere so temporarily mount it to poke inside, otherwise, scan
+ # directly
+ if not osd_path:
+ # check if we have an encrypted device first, so that we can poke at
+ # the lockbox instead
+ if self.is_encrypted:
+ if not self.encryption_metadata.get('lockbox'):
+ raise RuntimeError(
+ 'Lockbox partition was not found for device: %s' % args.osd_path
+ )
+ osd_metadata = self.scan_encrypted()
+ else:
+ logger.info('device is not mounted, will mount it temporarily to scan')
+ with system.tmp_mount(args.osd_path) as osd_path:
+ osd_metadata = self.scan_directory(osd_path)
+ else:
+ if self.is_encrypted:
+ logger.info('will scan encrypted OSD directory at path: %s', osd_path)
+ osd_metadata = self.scan_encrypted(osd_path)
+ else:
+ logger.info('will scan OSD directory at path: %s', osd_path)
+ osd_metadata = self.scan_directory(osd_path)
+
+ osd_id = osd_metadata['whoami']
+ osd_fsid = osd_metadata['fsid']
+ filename = '%s-%s.json' % (osd_id, osd_fsid)
+ json_path = os.path.join(self.etc_path, filename)
+
+ if os.path.exists(json_path) and not args.stdout:
+ if not args.force:
+ raise RuntimeError(
+ '--force was not used and OSD metadata file exists: %s' % json_path
+ )
+
+ if args.stdout:
+ print(json.dumps(osd_metadata, indent=4, sort_keys=True, ensure_ascii=False))
+ else:
+ with open(json_path, 'w') as fp:
+ json.dump(osd_metadata, fp, indent=4, sort_keys=True, ensure_ascii=False)
+ fp.write(os.linesep)
+ terminal.success(
+ 'OSD %s got scanned and metadata persisted to file: %s' % (
+ osd_id,
+ json_path
+ )
+ )
+ terminal.success(
+ 'To take over management of this scanned OSD, and disable ceph-disk and udev, run:'
+ )
+ terminal.success(' ceph-volume simple activate %s %s' % (osd_id, osd_fsid))
+
+ if not osd_metadata.get('data'):
+ msg = 'Unable to determine device mounted on %s' % args.osd_path
+ logger.warning(msg)
+ terminal.warning(msg)
+ terminal.warning('OSD will not be able to start without this information:')
+ terminal.warning(' "data": "/path/to/device",')
+ logger.warning('Unable to determine device mounted on %s' % args.osd_path)
+
+ def main(self):
+ sub_command_help = dedent("""
+ Scan running OSDs, an OSD directory (or data device) for files and configurations
+ that will allow to take over the management of the OSD.
+
+ Scanned OSDs will get their configurations stored in
+ /etc/ceph/osd/<id>-<fsid>.json
+
+ For an OSD ID of 0 with fsid of ``a9d50838-e823-43d6-b01f-2f8d0a77afc2``
+ that could mean a scan command that looks like::
+
+ ceph-volume simple scan /var/lib/ceph/osd/ceph-0
+
+ Which would store the metadata in a JSON file at::
+
+ /etc/ceph/osd/0-a9d50838-e823-43d6-b01f-2f8d0a77afc2.json
+
+ To scan all running OSDs:
+
+ ceph-volume simple scan
+
+ To a scan a specific running OSD:
+
+ ceph-volume simple scan /var/lib/ceph/osd/{cluster}-{osd id}
+
+ And to scan a device (mounted or unmounted) that has OSD data in it, for example /dev/sda1
+
+ ceph-volume simple scan /dev/sda1
+
+ Scanning a device or directory that belongs to an OSD not created by ceph-disk will be ingored.
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume simple scan',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ '-f', '--force',
+ action='store_true',
+ help='If OSD has already been scanned, the JSON file will be overwritten'
+ )
+
+ parser.add_argument(
+ '--stdout',
+ action='store_true',
+ help='Do not save to a file, output metadata to stdout'
+ )
+
+ parser.add_argument(
+ 'osd_path',
+ metavar='OSD_PATH',
+ type=arg_validators.OSDPath(),
+ nargs='?',
+ default=None,
+ help='Path to an existing OSD directory or OSD data partition'
+ )
+
+ args = parser.parse_args(self.argv)
+ paths = []
+ if args.osd_path:
+ paths.append(args.osd_path)
+ else:
+ osd_ids = systemctl.get_running_osd_ids()
+ for osd_id in osd_ids:
+ paths.append("/var/lib/ceph/osd/{}-{}".format(
+ conf.cluster,
+ osd_id,
+ ))
+
+ # Capture some environment status, so that it can be reused all over
+ self.device_mounts = system.Mounts(devices=True).get_mounts()
+ self.path_mounts = system.Mounts(paths=True).get_mounts()
+
+ for path in paths:
+ args.osd_path = path
+ device = Device(args.osd_path)
+ if device.is_partition:
+ if device.ceph_disk.type != 'data':
+ label = device.ceph_disk.partlabel
+ msg = 'Device must be the ceph data partition, but PARTLABEL reported: "%s"' % label
+ raise RuntimeError(msg)
+
+ self.encryption_metadata = encryption.legacy_encrypted(args.osd_path)
+ self.is_encrypted = self.encryption_metadata['encrypted']
+
+ if self.encryption_metadata['device'] != "tmpfs":
+ device = Device(self.encryption_metadata['device'])
+ if not device.is_ceph_disk_member:
+ terminal.warning("Ignoring %s because it's not a ceph-disk created osd." % path)
+ else:
+ self.scan(args)
+ else:
+ terminal.warning("Ignoring %s because it's not a ceph-disk created osd." % path)
diff --git a/src/ceph-volume/ceph_volume/devices/simple/trigger.py b/src/ceph-volume/ceph_volume/devices/simple/trigger.py
new file mode 100644
index 000000000..c01d9ae2a
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/simple/trigger.py
@@ -0,0 +1,70 @@
+from __future__ import print_function
+import argparse
+from textwrap import dedent
+from ceph_volume.exceptions import SuffixParsingError
+from ceph_volume import decorators
+from .activate import Activate
+
+
+def parse_osd_id(string):
+ osd_id = string.split('-', 1)[0]
+ if not osd_id:
+ raise SuffixParsingError('OSD id', string)
+ if osd_id.isdigit():
+ return osd_id
+ raise SuffixParsingError('OSD id', string)
+
+
+def parse_osd_uuid(string):
+ osd_id = '%s-' % parse_osd_id(string)
+ # remove the id first
+ osd_uuid = string.split(osd_id, 1)[-1]
+ if not osd_uuid:
+ raise SuffixParsingError('OSD uuid', string)
+ return osd_uuid
+
+
+class Trigger(object):
+
+ help = 'systemd helper to activate an OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ @decorators.needs_root
+ def main(self):
+ sub_command_help = dedent("""
+ ** DO NOT USE DIRECTLY **
+ This tool is meant to help the systemd unit that knows about OSDs.
+
+ Proxy OSD activation to ``ceph-volume simple activate`` by parsing the
+ input from systemd, detecting the UUID and ID associated with an OSD::
+
+ ceph-volume simple trigger {SYSTEMD-DATA}
+
+ The systemd "data" is expected to be in the format of::
+
+ {OSD ID}-{OSD UUID}
+
+ The devices associated with the OSD need to have been scanned previously,
+ so that all needed metadata can be used for starting the OSD process.
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume simple trigger',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'systemd_data',
+ metavar='SYSTEMD_DATA',
+ nargs='?',
+ help='Data from a systemd unit containing ID and UUID of the OSD, like 0-asdf-lkjh'
+ )
+ if len(self.argv) == 0:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ osd_id = parse_osd_id(args.systemd_data)
+ osd_uuid = parse_osd_uuid(args.systemd_data)
+ Activate([osd_id, osd_uuid], from_trigger=True).main()