summaryrefslogtreecommitdiffstats
path: root/src/ceph-volume/ceph_volume/devices/raw
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/__init__.py1
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/activate.py166
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/common.py52
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/list.py163
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/main.py40
-rw-r--r--src/ceph-volume/ceph_volume/devices/raw/prepare.py169
6 files changed, 591 insertions, 0 deletions
diff --git a/src/ceph-volume/ceph_volume/devices/raw/__init__.py b/src/ceph-volume/ceph_volume/devices/raw/__init__.py
new file mode 100644
index 000000000..dd0a6534c
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/__init__.py
@@ -0,0 +1 @@
+from .main import Raw # noqa
diff --git a/src/ceph-volume/ceph_volume/devices/raw/activate.py b/src/ceph-volume/ceph_volume/devices/raw/activate.py
new file mode 100644
index 000000000..17be57dfe
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/activate.py
@@ -0,0 +1,166 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+from textwrap import dedent
+from ceph_volume import process, conf, decorators, terminal
+from ceph_volume.util import system
+from ceph_volume.util import prepare as prepare_utils
+from .list import direct_report
+
+
+logger = logging.getLogger(__name__)
+
+def activate_bluestore(meta, tmpfs, systemd):
+ # find the osd
+ osd_id = meta['osd_id']
+ osd_uuid = meta['osd_uuid']
+
+ # mount on tmpfs the osd directory
+ osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+ if not system.path_is_mounted(osd_path):
+ # mkdir -p and mount as tmpfs
+ prepare_utils.create_osd_path(osd_id, tmpfs=tmpfs)
+
+ # XXX This needs to be removed once ceph-bluestore-tool can deal with
+ # symlinks that exist in the osd dir
+ for link_name in ['block', 'block.db', 'block.wal']:
+ link_path = os.path.join(osd_path, link_name)
+ if os.path.exists(link_path):
+ os.unlink(os.path.join(osd_path, link_name))
+
+ # Once symlinks are removed, the osd dir can be 'primed again. chown first,
+ # regardless of what currently exists so that ``prime-osd-dir`` can succeed
+ # even if permissions are somehow messed up
+ system.chown(osd_path)
+ prime_command = [
+ 'ceph-bluestore-tool',
+ 'prime-osd-dir',
+ '--path', osd_path,
+ '--no-mon-config',
+ '--dev', meta['device'],
+ ]
+ process.run(prime_command)
+
+ # always re-do the symlink regardless if it exists, so that the block,
+ # block.wal, and block.db devices that may have changed can be mapped
+ # correctly every time
+ prepare_utils.link_block(meta['device'], osd_id)
+
+ if 'device_db' in meta:
+ prepare_utils.link_db(meta['device_db'], osd_id, osd_uuid)
+
+ if 'device_wal' in meta:
+ prepare_utils.link_wal(meta['device_wal'], osd_id, osd_uuid)
+
+ system.chown(osd_path)
+ terminal.success("ceph-volume raw activate successful for osd ID: %s" % osd_id)
+
+
+class Activate(object):
+
+ help = 'Discover and prepare a data directory for a (BlueStore) OSD on a raw device'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.args = None
+
+ @decorators.needs_root
+ def activate(self, devs, start_osd_id, start_osd_uuid,
+ tmpfs, systemd):
+ """
+ :param args: The parsed arguments coming from the CLI
+ """
+ assert devs or start_osd_id or start_osd_uuid
+ found = direct_report(devs)
+
+ activated_any = False
+ for osd_uuid, meta in found.items():
+ osd_id = meta['osd_id']
+ if start_osd_id is not None and str(osd_id) != str(start_osd_id):
+ continue
+ if start_osd_uuid is not None and osd_uuid != start_osd_uuid:
+ continue
+ logger.info('Activating osd.%s uuid %s cluster %s' % (
+ osd_id, osd_uuid, meta['ceph_fsid']))
+ activate_bluestore(meta,
+ tmpfs=tmpfs,
+ systemd=systemd)
+ activated_any = True
+
+ if not activated_any:
+ raise RuntimeError('did not find any matching OSD to activate')
+
+ def main(self):
+ sub_command_help = dedent("""
+ Activate (BlueStore) OSD on a raw block device(s) based on the
+ device label (normally the first block of the device).
+
+ ceph-volume raw activate [/dev/sdb2 ...]
+
+ or
+
+ ceph-volume raw activate --osd-id NUM --osd-uuid UUID
+
+ The device(s) associated with the OSD need to have been prepared
+ previously, so that all needed tags and metadata exist.
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume raw activate',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+ parser.add_argument(
+ '--device',
+ help='The device for the OSD to start'
+ )
+ parser.add_argument(
+ '--osd-id',
+ help='OSD ID to activate'
+ )
+ parser.add_argument(
+ '--osd-uuid',
+ help='OSD UUID to active'
+ )
+ parser.add_argument(
+ '--no-systemd',
+ dest='no_systemd',
+ action='store_true',
+ help='Skip creating and enabling systemd units and starting OSD services'
+ )
+ parser.add_argument(
+ '--block.db',
+ dest='block_db',
+ help='Path to bluestore block.db block device'
+ )
+ parser.add_argument(
+ '--block.wal',
+ dest='block_wal',
+ help='Path to bluestore block.wal block device'
+ )
+ parser.add_argument(
+ '--no-tmpfs',
+ action='store_true',
+ help='Do not use a tmpfs mount for OSD data dir'
+ )
+
+ if not self.argv:
+ print(sub_command_help)
+ return
+ args = parser.parse_args(self.argv)
+ self.args = args
+ if not args.no_systemd:
+ terminal.error('systemd support not yet implemented')
+ raise SystemExit(1)
+
+ devs = [args.device]
+ if args.block_wal:
+ devs.append(args.block_wal)
+ if args.block_db:
+ devs.append(args.block_db)
+
+ self.activate(devs=devs,
+ start_osd_id=args.osd_id,
+ start_osd_uuid=args.osd_uuid,
+ tmpfs=not args.no_tmpfs,
+ systemd=not self.args.no_systemd)
diff --git a/src/ceph-volume/ceph_volume/devices/raw/common.py b/src/ceph-volume/ceph_volume/devices/raw/common.py
new file mode 100644
index 000000000..19de81fe5
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/common.py
@@ -0,0 +1,52 @@
+import argparse
+from ceph_volume.util import arg_validators
+
+def create_parser(prog, description):
+ """
+ Both prepare and create share the same parser, those are defined here to
+ avoid duplication
+ """
+ parser = argparse.ArgumentParser(
+ prog=prog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=description,
+ )
+ parser.add_argument(
+ '--data',
+ required=True,
+ type=arg_validators.ValidRawDevice(as_string=True),
+ help='a raw device to use for the OSD',
+ )
+ parser.add_argument(
+ '--bluestore',
+ action='store_true',
+ help='Use BlueStore backend')
+ parser.add_argument(
+ '--crush-device-class',
+ dest='crush_device_class',
+ help='Crush device class to assign this OSD to',
+ default=""
+ )
+ parser.add_argument(
+ '--no-tmpfs',
+ action='store_true',
+ help='Do not use a tmpfs mount for OSD data dir'
+ )
+ parser.add_argument(
+ '--block.db',
+ dest='block_db',
+ help='Path to bluestore block.db block device',
+ type=arg_validators.ValidRawDevice(as_string=True)
+ )
+ parser.add_argument(
+ '--block.wal',
+ dest='block_wal',
+ help='Path to bluestore block.wal block device',
+ type=arg_validators.ValidRawDevice(as_string=True)
+ )
+ parser.add_argument(
+ '--dmcrypt',
+ action='store_true',
+ help='Enable device encryption via dm-crypt',
+ )
+ return parser
diff --git a/src/ceph-volume/ceph_volume/devices/raw/list.py b/src/ceph-volume/ceph_volume/devices/raw/list.py
new file mode 100644
index 000000000..06a2b3c22
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/list.py
@@ -0,0 +1,163 @@
+from __future__ import print_function
+import argparse
+import json
+import logging
+from textwrap import dedent
+from ceph_volume import decorators, process
+from ceph_volume.util import disk
+
+
+logger = logging.getLogger(__name__)
+
+
+def direct_report(devices):
+ """
+ Other non-cli consumers of listing information will want to consume the
+ report without the need to parse arguments or other flags. This helper
+ bypasses the need to deal with the class interface which is meant for cli
+ handling.
+ """
+ _list = List([])
+ return _list.generate(devices)
+
+def _get_bluestore_info(dev):
+ out, err, rc = process.call([
+ 'ceph-bluestore-tool', 'show-label',
+ '--dev', dev], verbose_on_failure=False)
+ if rc:
+ # ceph-bluestore-tool returns an error (below) if device is not bluestore OSD
+ # > unable to read label for <device>: (2) No such file or directory
+ # but it's possible the error could be for a different reason (like if the disk fails)
+ logger.debug('assuming device {} is not BlueStore; ceph-bluestore-tool failed to get info from device: {}\n{}'.format(dev, out, err))
+ return None
+ oj = json.loads(''.join(out))
+ if dev not in oj:
+ # should be impossible, so warn
+ logger.warning('skipping device {} because it is not reported in ceph-bluestore-tool output: {}'.format(dev, out))
+ return None
+ try:
+ r = {
+ 'osd_uuid': oj[dev]['osd_uuid'],
+ }
+ if oj[dev]['description'] == 'main':
+ whoami = oj[dev]['whoami']
+ r.update({
+ 'type': 'bluestore',
+ 'osd_id': int(whoami),
+ 'ceph_fsid': oj[dev]['ceph_fsid'],
+ 'device': dev,
+ })
+ elif oj[dev]['description'] == 'bluefs db':
+ r['device_db'] = dev
+ elif oj[dev]['description'] == 'bluefs wal':
+ r['device_wal'] = dev
+ return r
+ except KeyError as e:
+ # this will appear for devices that have a bluestore header but aren't valid OSDs
+ # for example, due to incomplete rollback of OSDs: https://tracker.ceph.com/issues/51869
+ logger.error('device {} does not have all BlueStore data needed to be a valid OSD: {}\n{}'.format(dev, out, e))
+ return None
+
+
+class List(object):
+
+ help = 'list BlueStore OSDs on raw devices'
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def generate(self, devs=None):
+ logger.debug('Listing block devices via lsblk...')
+ info_devices = disk.lsblk_all(abspath=True)
+ if devs is None or devs == []:
+ # If no devs are given initially, we want to list ALL devices including children and
+ # parents. Parent disks with child partitions may be the appropriate device to return if
+ # the parent disk has a bluestore header, but children may be the most appropriate
+ # devices to return if the parent disk does not have a bluestore header.
+ devs = [device['NAME'] for device in info_devices if device.get('NAME',)]
+
+ result = {}
+ logger.debug('inspecting devices: {}'.format(devs))
+ for dev in devs:
+ # Linux kernels built with CONFIG_ATARI_PARTITION enabled can falsely interpret
+ # bluestore's on-disk format as an Atari partition table. These false Atari partitions
+ # can be interpreted as real OSDs if a bluestore OSD was previously created on the false
+ # partition. See https://tracker.ceph.com/issues/52060 for more info. If a device has a
+ # parent, it is a child. If the parent is a valid bluestore OSD, the child will only
+ # exist if it is a phantom Atari partition, and the child should be ignored. If the
+ # parent isn't bluestore, then the child could be a valid bluestore OSD. If we fail to
+ # determine whether a parent is bluestore, we should err on the side of not reporting
+ # the child so as not to give a false negative.
+ for info_device in info_devices:
+ if 'PKNAME' in info_device and info_device['PKNAME'] != "":
+ parent = info_device['PKNAME']
+ try:
+ if disk.has_bluestore_label(parent):
+ logger.warning(('ignoring child device {} whose parent {} is a BlueStore OSD.'.format(dev, parent),
+ 'device is likely a phantom Atari partition. device info: {}'.format(info_device)))
+ continue
+ except OSError as e:
+ logger.error(('ignoring child device {} to avoid reporting invalid BlueStore data from phantom Atari partitions.'.format(dev),
+ 'failed to determine if parent device {} is BlueStore. err: {}'.format(parent, e)))
+ continue
+
+ bs_info = _get_bluestore_info(dev)
+ if bs_info is None:
+ # None is also returned in the rare event that there is an issue reading info from
+ # a BlueStore disk, so be sure to log our assumption that it isn't bluestore
+ logger.info('device {} does not have BlueStore information'.format(dev))
+ continue
+ uuid = bs_info['osd_uuid']
+ if uuid not in result:
+ result[uuid] = {}
+ result[uuid].update(bs_info)
+
+ return result
+
+ @decorators.needs_root
+ def list(self, args):
+ report = self.generate(args.device)
+ if args.format == 'json':
+ print(json.dumps(report, indent=4, sort_keys=True))
+ else:
+ if not report:
+ raise SystemExit('No valid Ceph devices found')
+ raise RuntimeError('not implemented yet')
+
+ def main(self):
+ sub_command_help = dedent("""
+ List OSDs on raw devices with raw device labels (usually the first
+ block of the device).
+
+ Full listing of all identifiable (currently, BlueStore) OSDs
+ on raw devices:
+
+ ceph-volume raw list
+
+ List a particular device, reporting all metadata about it::
+
+ ceph-volume raw list /dev/sda1
+
+ """)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume raw list',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=sub_command_help,
+ )
+
+ parser.add_argument(
+ 'device',
+ metavar='DEVICE',
+ nargs='*',
+ help='Path to a device like /dev/sda1'
+ )
+
+ parser.add_argument(
+ '--format',
+ help='output format, defaults to "pretty"',
+ default='json',
+ choices=['json', 'pretty'],
+ )
+
+ args = parser.parse_args(self.argv)
+ self.list(args)
diff --git a/src/ceph-volume/ceph_volume/devices/raw/main.py b/src/ceph-volume/ceph_volume/devices/raw/main.py
new file mode 100644
index 000000000..efa251090
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/main.py
@@ -0,0 +1,40 @@
+import argparse
+from textwrap import dedent
+from ceph_volume import terminal
+from . import list
+from . import prepare
+from . import activate
+
+class Raw(object):
+
+ help = 'Manage single-device OSDs on raw block devices'
+
+ _help = dedent("""
+ Manage a single-device OSD on a raw block device. Rely on
+ the existing device labels to store any needed metadata.
+
+ {sub_help}
+ """)
+
+ mapper = {
+ 'list': list.List,
+ 'prepare': prepare.Prepare,
+ 'activate': activate.Activate,
+ }
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def print_help(self, sub_help):
+ return self._help.format(sub_help=sub_help)
+
+ def main(self):
+ terminal.dispatch(self.mapper, self.argv)
+ parser = argparse.ArgumentParser(
+ prog='ceph-volume raw',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=self.print_help(terminal.subhelp(self.mapper)),
+ )
+ parser.parse_args(self.argv)
+ if len(self.argv) <= 1:
+ return parser.print_help()
diff --git a/src/ceph-volume/ceph_volume/devices/raw/prepare.py b/src/ceph-volume/ceph_volume/devices/raw/prepare.py
new file mode 100644
index 000000000..3c96eedac
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/devices/raw/prepare.py
@@ -0,0 +1,169 @@
+from __future__ import print_function
+import json
+import logging
+import os
+from textwrap import dedent
+from ceph_volume.util import prepare as prepare_utils
+from ceph_volume.util import encryption as encryption_utils
+from ceph_volume.util import disk
+from ceph_volume.util import system
+from ceph_volume import decorators, terminal
+from ceph_volume.devices.lvm.common import rollback_osd
+from .common import create_parser
+
+logger = logging.getLogger(__name__)
+
+def prepare_dmcrypt(key, device, device_type, fsid):
+ """
+ Helper for devices that are encrypted. The operations needed for
+ block, db, wal, or data/journal devices are all the same
+ """
+ if not device:
+ return ''
+ kname = disk.lsblk(device)['KNAME']
+ mapping = 'ceph-{}-{}-{}-dmcrypt'.format(fsid, kname, device_type)
+ # format data device
+ encryption_utils.luks_format(
+ key,
+ device
+ )
+ encryption_utils.luks_open(
+ key,
+ device,
+ mapping
+ )
+
+ return '/dev/mapper/{}'.format(mapping)
+
+def prepare_bluestore(block, wal, db, secrets, osd_id, fsid, tmpfs):
+ """
+ :param block: The name of the logical volume for the bluestore data
+ :param wal: a regular/plain disk or logical volume, to be used for block.wal
+ :param db: a regular/plain disk or logical volume, to be used for block.db
+ :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
+ :param id_: The OSD id
+ :param fsid: The OSD fsid, also known as the OSD UUID
+ """
+ cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
+
+ if secrets.get('dmcrypt_key'):
+ key = secrets['dmcrypt_key']
+ block = prepare_dmcrypt(key, block, 'block', fsid)
+ wal = prepare_dmcrypt(key, wal, 'wal', fsid)
+ db = prepare_dmcrypt(key, db, 'db', fsid)
+
+ # create the directory
+ prepare_utils.create_osd_path(osd_id, tmpfs=tmpfs)
+ # symlink the block
+ prepare_utils.link_block(block, osd_id)
+ # get the latest monmap
+ prepare_utils.get_monmap(osd_id)
+ # write the OSD keyring if it doesn't exist already
+ prepare_utils.write_keyring(osd_id, cephx_secret)
+ # prepare the osd filesystem
+ prepare_utils.osd_mkfs_bluestore(
+ osd_id, fsid,
+ keyring=cephx_secret,
+ wal=wal,
+ db=db
+ )
+
+
+class Prepare(object):
+
+ help = 'Format a raw device and associate it with a (BlueStore) OSD'
+
+ def __init__(self, argv):
+ self.argv = argv
+ self.osd_id = None
+
+ def safe_prepare(self, args=None):
+ """
+ An intermediate step between `main()` and `prepare()` so that we can
+ capture the `self.osd_id` in case we need to rollback
+
+ :param args: Injected args, usually from `raw create` which compounds
+ both `prepare` and `create`
+ """
+ if args is not None:
+ self.args = args
+ try:
+ self.prepare()
+ except Exception:
+ logger.exception('raw prepare was unable to complete')
+ logger.info('will rollback OSD ID creation')
+ rollback_osd(self.args, self.osd_id)
+ raise
+ dmcrypt_log = 'dmcrypt' if args.dmcrypt else 'clear'
+ terminal.success("ceph-volume raw {} prepare successful for: {}".format(dmcrypt_log, self.args.data))
+
+
+ @decorators.needs_root
+ def prepare(self):
+ secrets = {'cephx_secret': prepare_utils.create_key()}
+ encrypted = 1 if self.args.dmcrypt else 0
+ cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key()
+
+ if encrypted:
+ secrets['dmcrypt_key'] = os.getenv('CEPH_VOLUME_DMCRYPT_SECRET')
+ secrets['cephx_lockbox_secret'] = cephx_lockbox_secret # dummy value to make `ceph osd new` not complaining
+
+ osd_fsid = system.generate_uuid()
+ crush_device_class = self.args.crush_device_class
+ if crush_device_class:
+ secrets['crush_device_class'] = crush_device_class
+ tmpfs = not self.args.no_tmpfs
+ wal = ""
+ db = ""
+ if self.args.block_wal:
+ wal = self.args.block_wal
+ if self.args.block_db:
+ db = self.args.block_db
+
+ # reuse a given ID if it exists, otherwise create a new ID
+ self.osd_id = prepare_utils.create_id(
+ osd_fsid, json.dumps(secrets))
+
+ prepare_bluestore(
+ self.args.data,
+ wal,
+ db,
+ secrets,
+ self.osd_id,
+ osd_fsid,
+ tmpfs,
+ )
+
+ def main(self):
+ sub_command_help = dedent("""
+ Prepare an OSD by assigning an ID and FSID, registering them with the
+ cluster with an ID and FSID, formatting the volume.
+
+ Once the OSD is ready, an ad-hoc systemd unit will be enabled so that
+ it can later get activated and the OSD daemon can get started.
+
+ ceph-volume raw prepare --bluestore --data {device}
+
+ DB and WAL devices are supported.
+
+ ceph-volume raw prepare --bluestore --data {device} --block.db {device} --block.wal {device}
+
+ """)
+ parser = create_parser(
+ prog='ceph-volume raw prepare',
+ description=sub_command_help,
+ )
+ if not self.argv:
+ print(sub_command_help)
+ return
+ self.args = parser.parse_args(self.argv)
+ if not self.args.bluestore:
+ terminal.error('must specify --bluestore (currently the only supported backend)')
+ raise SystemExit(1)
+ if self.args.dmcrypt and not os.getenv('CEPH_VOLUME_DMCRYPT_SECRET'):
+ terminal.error('encryption was requested (--dmcrypt) but environment variable ' \
+ 'CEPH_VOLUME_DMCRYPT_SECRET is not set, you must set ' \
+ 'this variable to provide a dmcrypt secret.')
+ raise SystemExit(1)
+
+ self.safe_prepare(self.args)