summaryrefslogtreecommitdiffstats
path: root/src/ceph-volume/ceph_volume/util
diff options
context:
space:
mode:
Diffstat (limited to 'src/ceph-volume/ceph_volume/util')
-rw-r--r--src/ceph-volume/ceph_volume/util/__init__.py108
-rw-r--r--src/ceph-volume/ceph_volume/util/arg_validators.py222
-rw-r--r--src/ceph-volume/ceph_volume/util/constants.py46
-rw-r--r--src/ceph-volume/ceph_volume/util/device.py699
-rw-r--r--src/ceph-volume/ceph_volume/util/disk.py927
-rw-r--r--src/ceph-volume/ceph_volume/util/encryption.py278
-rw-r--r--src/ceph-volume/ceph_volume/util/lsmdisk.py196
-rw-r--r--src/ceph-volume/ceph_volume/util/prepare.py535
-rw-r--r--src/ceph-volume/ceph_volume/util/system.py419
-rw-r--r--src/ceph-volume/ceph_volume/util/templates.py49
10 files changed, 3479 insertions, 0 deletions
diff --git a/src/ceph-volume/ceph_volume/util/__init__.py b/src/ceph-volume/ceph_volume/util/__init__.py
new file mode 100644
index 000000000..1b5afe970
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/__init__.py
@@ -0,0 +1,108 @@
+import logging
+from math import floor
+from ceph_volume import terminal
+
+try:
+ input = raw_input # pylint: disable=redefined-builtin
+except NameError:
+ pass
+
+logger = logging.getLogger(__name__)
+
+
+def as_string(string):
+ """
+ Ensure that whatever type of string is incoming, it is returned as an
+ actual string, versus 'bytes' which Python 3 likes to use.
+ """
+ if isinstance(string, bytes):
+ # we really ignore here if we can't properly decode with utf-8
+ return string.decode('utf-8', 'ignore')
+ return string
+
+
+def as_bytes(string):
+ """
+ Ensure that whatever type of string is incoming, it is returned as bytes,
+ encoding to utf-8 otherwise
+ """
+ if isinstance(string, bytes):
+ return string
+ return string.encode('utf-8', errors='ignore')
+
+
+def str_to_int(string, round_down=True):
+ """
+ Parses a string number into an integer, optionally converting to a float
+ and rounding down.
+
+ Some LVM values may come with a comma instead of a dot to define decimals.
+ This function normalizes a comma into a dot
+ """
+ error_msg = "Unable to convert to integer: '%s'" % str(string)
+ try:
+ integer = float(string.replace(',', '.'))
+ except AttributeError:
+ # this might be a integer already, so try to use it, otherwise raise
+ # the original exception
+ if isinstance(string, (int, float)):
+ integer = string
+ else:
+ logger.exception(error_msg)
+ raise RuntimeError(error_msg)
+ except (TypeError, ValueError):
+ logger.exception(error_msg)
+ raise RuntimeError(error_msg)
+
+ if round_down:
+ integer = floor(integer)
+ else:
+ integer = round(integer)
+ return int(integer)
+
+
+def str_to_bool(val):
+ """
+ Convert a string representation of truth to True or False
+
+ True values are 'y', 'yes', or ''; case-insensitive
+ False values are 'n', or 'no'; case-insensitive
+ Raises ValueError if 'val' is anything else.
+ """
+ true_vals = ['yes', 'y', '']
+ false_vals = ['no', 'n']
+ try:
+ val = val.lower()
+ except AttributeError:
+ val = str(val).lower()
+ if val in true_vals:
+ return True
+ elif val in false_vals:
+ return False
+ else:
+ raise ValueError("Invalid input value: %s" % val)
+
+
+def prompt_bool(question, input_=None):
+ """
+ Interface to prompt a boolean (or boolean-like) response from a user.
+ Usually a confirmation.
+ """
+ input_prompt = input_ or input
+ prompt_format = '--> {question} '.format(question=question)
+ response = input_prompt(prompt_format)
+ try:
+ return str_to_bool(response)
+ except ValueError:
+ terminal.error('Valid true responses are: y, yes, <Enter>')
+ terminal.error('Valid false responses are: n, no')
+ terminal.error('That response was invalid, please try again')
+ return prompt_bool(question, input_=input_prompt)
+
+def merge_dict(x, y):
+ """
+ Return two dicts merged
+ """
+ z = x.copy()
+ z.update(y)
+ return z \ No newline at end of file
diff --git a/src/ceph-volume/ceph_volume/util/arg_validators.py b/src/ceph-volume/ceph_volume/util/arg_validators.py
new file mode 100644
index 000000000..270c8a648
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/arg_validators.py
@@ -0,0 +1,222 @@
+import argparse
+import os
+from ceph_volume import terminal, decorators, process
+from ceph_volume.util.device import Device
+from ceph_volume.util import disk
+
+
+def valid_osd_id(val):
+ return str(int(val))
+
+class ValidDevice(object):
+
+ def __init__(self, as_string=False, gpt_ok=False):
+ self.as_string = as_string
+ self.gpt_ok = gpt_ok
+
+ def __call__(self, dev_path):
+ self.get_device(dev_path)
+ self._validated_device = self._is_valid_device()
+ return self._format_device(self._validated_device)
+
+ def get_device(self, dev_path):
+ self._device = Device(dev_path)
+ self.dev_path = dev_path
+
+ def _format_device(self, device):
+ if self.as_string:
+ if device.is_lv:
+ # all codepaths expect an lv path to be returned in this format
+ return "{}/{}".format(device.vg_name, device.lv_name)
+ return device.path
+ return device
+
+ def _is_valid_device(self):
+ error = None
+ if not self._device.exists:
+ error = "Unable to proceed with non-existing device: %s" % self.dev_path
+ # FIXME this is not a nice API, this validator was meant to catch any
+ # non-existing devices upfront, not check for gpt headers. Now this
+ # needs to optionally skip checking gpt headers which is beyond
+ # verifying if the device exists. The better solution would be to
+ # configure this with a list of checks that can be excluded/included on
+ # __init__
+ elif self._device.has_gpt_headers and not self.gpt_ok:
+ error = "GPT headers found, they must be removed on: %s" % self.dev_path
+ if self._device.has_partitions:
+ raise RuntimeError("Device {} has partitions.".format(self.dev_path))
+ if error:
+ raise argparse.ArgumentError(None, error)
+ return self._device
+
+
+class ValidZapDevice(ValidDevice):
+ def __call__(self, dev_path):
+ super().get_device(dev_path)
+ return self._format_device(self._is_valid_device())
+
+ def _is_valid_device(self, raise_sys_exit=True):
+ super()._is_valid_device()
+ return self._device
+
+
+class ValidDataDevice(ValidDevice):
+ def __call__(self, dev_path):
+ super().get_device(dev_path)
+ return self._format_device(self._is_valid_device())
+
+ def _is_valid_device(self, raise_sys_exit=True):
+ super()._is_valid_device()
+ if self._device.used_by_ceph:
+ terminal.info('Device {} is already prepared'.format(self.dev_path))
+ if raise_sys_exit:
+ raise SystemExit(0)
+ if self._device.has_fs and not self._device.used_by_ceph:
+ raise RuntimeError("Device {} has a filesystem.".format(self.dev_path))
+ if self.dev_path[0] == '/' and disk.has_bluestore_label(self.dev_path):
+ raise RuntimeError("Device {} has bluestore signature.".format(self.dev_path))
+ return self._device
+
+class ValidRawDevice(ValidDevice):
+ def __call__(self, dev_path):
+ super().get_device(dev_path)
+ return self._format_device(self._is_valid_device())
+
+ def _is_valid_device(self, raise_sys_exit=True):
+ out, err, rc = process.call([
+ 'ceph-bluestore-tool', 'show-label',
+ '--dev', self.dev_path], verbose_on_failure=False)
+ if not rc:
+ terminal.info("Raw device {} is already prepared.".format(self.dev_path))
+ raise SystemExit(0)
+ if disk.blkid(self.dev_path).get('TYPE') == 'crypto_LUKS':
+ terminal.info("Raw device {} might already be in use for a dmcrypt OSD, skipping.".format(self.dev_path))
+ raise SystemExit(0)
+ super()._is_valid_device()
+ return self._device
+
+class ValidBatchDevice(ValidDevice):
+ def __call__(self, dev_path):
+ super().get_device(dev_path)
+ return self._format_device(self._is_valid_device())
+
+ def _is_valid_device(self, raise_sys_exit=False):
+ super()._is_valid_device()
+ if self._device.is_partition:
+ raise argparse.ArgumentError(
+ None,
+ '{} is a partition, please pass '
+ 'LVs or raw block devices'.format(self.dev_path))
+ return self._device
+
+
+class ValidBatchDataDevice(ValidBatchDevice, ValidDataDevice):
+ def __call__(self, dev_path):
+ super().get_device(dev_path)
+ return self._format_device(self._is_valid_device())
+
+ def _is_valid_device(self):
+ # if device is already used by ceph,
+ # leave the validation to Batch.get_deployment_layout()
+ # This way the idempotency isn't broken (especially when using --osds-per-device)
+ for lv in self._device.lvs:
+ if lv.tags.get('ceph.type') in ['db', 'wal', 'journal']:
+ return self._device
+ if self._device.used_by_ceph:
+ return self._device
+ super()._is_valid_device(raise_sys_exit=False)
+ return self._device
+
+
+class OSDPath(object):
+ """
+ Validate path exists and it looks like an OSD directory.
+ """
+
+ @decorators.needs_root
+ def __call__(self, string):
+ if not os.path.exists(string):
+ error = "Path does not exist: %s" % string
+ raise argparse.ArgumentError(None, error)
+
+ arg_is_partition = disk.is_partition(string)
+ if arg_is_partition:
+ return os.path.abspath(string)
+ absolute_path = os.path.abspath(string)
+ if not os.path.isdir(absolute_path):
+ error = "Argument is not a directory or device which is required to scan"
+ raise argparse.ArgumentError(None, error)
+ key_files = ['ceph_fsid', 'fsid', 'keyring', 'ready', 'type', 'whoami']
+ dir_files = os.listdir(absolute_path)
+ for key_file in key_files:
+ if key_file not in dir_files:
+ terminal.error('All following files must exist in path: %s' % ' '.join(key_files))
+ error = "Required file (%s) was not found in OSD dir path: %s" % (
+ key_file,
+ absolute_path
+ )
+ raise argparse.ArgumentError(None, error)
+
+ return os.path.abspath(string)
+
+
+def exclude_group_options(parser, groups, argv=None):
+ """
+ ``argparse`` has the ability to check for mutually exclusive options, but
+ it only allows a basic XOR behavior: only one flag can be used from
+ a defined group of options. This doesn't help when two groups of options
+ need to be separated. For example, with filestore and bluestore, neither
+ set can be used in conjunction with the other set.
+
+ This helper validator will consume the parser to inspect the group flags,
+ and it will group them together from ``groups``. This allows proper error
+ reporting, matching each incompatible flag with its group name.
+
+ :param parser: The argparse object, once it has configured all flags. It is
+ required to contain the group names being used to validate.
+ :param groups: A list of group names (at least two), with the same used for
+ ``add_argument_group``
+ :param argv: Consume the args (sys.argv) directly from this argument
+
+ .. note: **Unfortunately** this will not be able to validate correctly when
+ using default flags. In the case of filestore vs. bluestore, ceph-volume
+ defaults to --bluestore, but we can't check that programmatically, we can
+ only parse the flags seen via argv
+ """
+ # Reduce the parser groups to only the groups we need to intersect
+ parser_groups = [g for g in parser._action_groups if g.title in groups]
+ # A mapping of the group name to flags/options
+ group_flags = {}
+ flags_to_verify = []
+ for group in parser_groups:
+ # option groups may have more than one item in ``option_strings``, this
+ # will loop over ``_group_actions`` which contains the
+ # ``option_strings``, like ``['--filestore']``
+ group_flags[group.title] = [
+ option for group_action in group._group_actions
+ for option in group_action.option_strings
+ ]
+
+ # Gather all the flags present in the groups so that we only check on those.
+ for flags in group_flags.values():
+ flags_to_verify.extend(flags)
+
+ seen = []
+ last_flag = None
+ last_group = None
+ for flag in argv:
+ if flag not in flags_to_verify:
+ continue
+ for group_name, flags in group_flags.items():
+ if flag in flags:
+ seen.append(group_name)
+ # We are mutually excluding groups, so having more than 1 group
+ # in ``seen`` means we must raise an error
+ if len(set(seen)) == len(groups):
+ terminal.warning('Incompatible flags were found, some values may get ignored')
+ msg = 'Cannot use %s (%s) with %s (%s)' % (
+ last_flag, last_group, flag, group_name
+ )
+ terminal.warning(msg)
+ last_group = group_name
+ last_flag = flag
diff --git a/src/ceph-volume/ceph_volume/util/constants.py b/src/ceph-volume/ceph_volume/util/constants.py
new file mode 100644
index 000000000..3ec819ec3
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/constants.py
@@ -0,0 +1,46 @@
+
+# mount flags
+mount = dict(
+ xfs=['rw', 'noatime' , 'inode64']
+)
+
+
+# format flags
+mkfs = dict(
+ xfs=[
+ # force overwriting previous fs
+ '-f',
+ # set the inode size to 2kb
+ '-i', 'size=2048',
+ ],
+)
+
+# The fantastical world of ceph-disk labels, they should give you the
+# collywobbles
+ceph_disk_guids = {
+ # luks
+ '45b0969e-9b03-4f30-b4c6-35865ceff106': {'type': 'journal', 'encrypted': True, 'encryption_type': 'luks'},
+ 'cafecafe-9b03-4f30-b4c6-35865ceff106': {'type': 'block', 'encrypted': True, 'encryption_type': 'luks'},
+ '166418da-c469-4022-adf4-b30afd37f176': {'type': 'block.db', 'encrypted': True, 'encryption_type': 'luks'},
+ '86a32090-3647-40b9-bbbd-38d8c573aa86': {'type': 'block.wal', 'encrypted': True, 'encryption_type': 'luks'},
+ '4fbd7e29-9d25-41b8-afd0-35865ceff05d': {'type': 'data', 'encrypted': True, 'encryption_type': 'luks'},
+ # plain
+ '45b0969e-9b03-4f30-b4c6-5ec00ceff106': {'type': 'journal', 'encrypted': True, 'encryption_type': 'plain'},
+ 'cafecafe-9b03-4f30-b4c6-5ec00ceff106': {'type': 'block', 'encrypted': True, 'encryption_type': 'plain'},
+ '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3': {'type': 'block.db', 'encrypted': True, 'encryption_type': 'plain'},
+ '306e8683-4fe2-4330-b7c0-00a917c16966': {'type': 'block.wal', 'encrypted': True, 'encryption_type': 'plain'},
+ '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d': {'type': 'data', 'encrypted': True, 'encryption_type': 'plain'},
+ # regular guids that differ from plain
+ 'fb3aabf9-d25f-47cc-bf5e-721d1816496b': {'type': 'lockbox', 'encrypted': False, 'encryption_type': None},
+ '30cd0809-c2b2-499c-8879-2d6b78529876': {'type': 'block.db', 'encrypted': False, 'encryption_type': None},
+ '5ce17fce-4087-4169-b7ff-056cc58473f9': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None},
+ '4fbd7e29-9d25-41b8-afd0-062c0ceff05d': {'type': 'data', 'encrypted': False, 'encryption_type': None},
+ 'cafecafe-9b03-4f30-b4c6-b4b80ceff106': {'type': 'block', 'encrypted': False, 'encryption_type': None},
+ # multipath
+ '01b41e1b-002a-453c-9f17-88793989ff8f': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None},
+ 'ec6d6385-e346-45dc-be91-da2a7c8b3261': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None},
+ '45b0969e-8ae0-4982-bf9d-5a8d867af560': {'type': 'journal', 'encrypted': False, 'encryption_type': None},
+ '4fbd7e29-8ae0-4982-bf9d-5a8d867af560': {'type': 'data', 'encrypted': False, 'encryption_type': None},
+ '7f4a666a-16f3-47a2-8445-152ef4d03f6c': {'type': 'lockbox', 'encrypted': False, 'encryption_type': None},
+ 'cafecafe-8ae0-4982-bf9d-5a8d867af560': {'type': 'block', 'encrypted': False, 'encryption_type': None},
+}
diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py
new file mode 100644
index 000000000..51605c659
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/device.py
@@ -0,0 +1,699 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import os
+from functools import total_ordering
+from ceph_volume import sys_info
+from ceph_volume.api import lvm
+from ceph_volume.util import disk, system
+from ceph_volume.util.lsmdisk import LSMDisk
+from ceph_volume.util.constants import ceph_disk_guids
+from ceph_volume.util.disk import allow_loop_devices
+
+
+logger = logging.getLogger(__name__)
+
+
+report_template = """
+{dev:<25} {size:<12} {device_nodes:<15} {rot!s:<7} {available!s:<9} {model}"""
+
+
+def encryption_status(abspath):
+ """
+ Helper function to run ``encryption.status()``. It is done here to avoid
+ a circular import issue (encryption module imports from this module) and to
+ ease testing by allowing monkeypatching of this function.
+ """
+ from ceph_volume.util import encryption
+ return encryption.status(abspath)
+
+
+class Devices(object):
+ """
+ A container for Device instances with reporting
+ """
+
+ def __init__(self, filter_for_batch=False, with_lsm=False):
+ lvs = lvm.get_lvs()
+ lsblk_all = disk.lsblk_all()
+ all_devices_vgs = lvm.get_all_devices_vgs()
+ if not sys_info.devices:
+ sys_info.devices = disk.get_devices()
+ self.devices = [Device(k,
+ with_lsm,
+ lvs=lvs,
+ lsblk_all=lsblk_all,
+ all_devices_vgs=all_devices_vgs) for k in
+ sys_info.devices.keys()]
+ if filter_for_batch:
+ self.devices = [d for d in self.devices if d.available_lvm_batch]
+
+ def pretty_report(self):
+ output = [
+ report_template.format(
+ dev='Device Path',
+ size='Size',
+ rot='rotates',
+ model='Model name',
+ available='available',
+ device_nodes='Device nodes',
+
+ )]
+ for device in sorted(self.devices):
+ output.append(device.report())
+ return ''.join(output)
+
+ def json_report(self):
+ output = []
+ for device in sorted(self.devices):
+ output.append(device.json_report())
+ return output
+
+@total_ordering
+class Device(object):
+
+ pretty_template = """
+ {attr:<25} {value}"""
+
+ report_fields = [
+ 'ceph_device',
+ 'rejected_reasons',
+ 'available',
+ 'path',
+ 'sys_api',
+ 'device_id',
+ 'lsm_data',
+ ]
+ pretty_report_sys_fields = [
+ 'human_readable_size',
+ 'model',
+ 'removable',
+ 'ro',
+ 'rotational',
+ 'sas_address',
+ 'scheduler_mode',
+ 'vendor',
+ ]
+
+ # define some class variables; mostly to enable the use of autospec in
+ # unittests
+ lvs = []
+
+ def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_vgs=None):
+ self.path = path
+ # LVs can have a vg/lv path, while disks will have /dev/sda
+ self.symlink = None
+ # check if we are a symlink
+ if os.path.islink(self.path):
+ self.symlink = self.path
+ real_path = os.path.realpath(self.path)
+ # check if we are not a device mapper
+ if "dm-" not in real_path:
+ self.path = real_path
+ if not sys_info.devices:
+ if self.path:
+ sys_info.devices = disk.get_devices(device=self.path)
+ else:
+ sys_info.devices = disk.get_devices()
+ if sys_info.devices.get(self.path, {}):
+ self.device_nodes = sys_info.devices[self.path]['device_nodes']
+ self.sys_api = sys_info.devices.get(self.path, {})
+ self.partitions = self._get_partitions()
+ self.lv_api = None
+ self.lvs = [] if not lvs else lvs
+ self.lsblk_all = lsblk_all
+ self.all_devices_vgs = all_devices_vgs
+ self.vgs = []
+ self.vg_name = None
+ self.lv_name = None
+ self.disk_api = {}
+ self.blkid_api = None
+ self._exists = None
+ self._is_lvm_member = None
+ self.ceph_device = False
+ self._parse()
+ self.lsm_data = self.fetch_lsm(with_lsm)
+
+ self.available_lvm, self.rejected_reasons_lvm = self._check_lvm_reject_reasons()
+ self.available_raw, self.rejected_reasons_raw = self._check_raw_reject_reasons()
+ self.available = self.available_lvm and self.available_raw
+ self.rejected_reasons = list(set(self.rejected_reasons_lvm +
+ self.rejected_reasons_raw))
+
+ self.device_id = self._get_device_id()
+
+ def fetch_lsm(self, with_lsm):
+ '''
+ Attempt to fetch libstoragemgmt (LSM) metadata, and return to the caller
+ as a dict. An empty dict is passed back to the caller if the target path
+ is not a block device, or lsm is unavailable on the host. Otherwise the
+ json returned will provide LSM attributes, and any associated errors that
+ lsm encountered when probing the device.
+ '''
+ if not with_lsm or not self.exists or not self.is_device:
+ return {}
+
+ lsm_disk = LSMDisk(self.path)
+
+ return lsm_disk.json_report()
+
+ def __lt__(self, other):
+ '''
+ Implementing this method and __eq__ allows the @total_ordering
+ decorator to turn the Device class into a totally ordered type.
+ This can slower then implementing all comparison operations.
+ This sorting should put available devices before unavailable devices
+ and sort on the path otherwise (str sorting).
+ '''
+ if self.available == other.available:
+ return self.path < other.path
+ return self.available and not other.available
+
+ def __eq__(self, other):
+ return self.path == other.path
+
+ def __hash__(self):
+ return hash(self.path)
+
+ def load_blkid_api(self):
+ if self.blkid_api is None:
+ self.blkid_api = disk.blkid(self.path)
+
+ def _parse(self):
+ lv = None
+ if not self.sys_api:
+ # if no device was found check if we are a partition
+ partname = self.path.split('/')[-1]
+ for device, info in sys_info.devices.items():
+ part = info['partitions'].get(partname, {})
+ if part:
+ self.sys_api = part
+ break
+
+ if self.lvs:
+ for _lv in self.lvs:
+ # if the path is not absolute, we have 'vg/lv', let's use LV name
+ # to get the LV.
+ if self.path[0] == '/':
+ if _lv.lv_path == self.path:
+ lv = _lv
+ break
+ else:
+ vgname, lvname = self.path.split('/')
+ if _lv.lv_name == lvname and _lv.vg_name == vgname:
+ lv = _lv
+ break
+ else:
+ if self.path[0] == '/':
+ lv = lvm.get_single_lv(filters={'lv_path': self.path})
+ else:
+ vgname, lvname = self.path.split('/')
+ lv = lvm.get_single_lv(filters={'lv_name': lvname,
+ 'vg_name': vgname})
+
+ if lv:
+ self.lv_api = lv
+ self.lvs = [lv]
+ self.path = lv.lv_path
+ self.vg_name = lv.vg_name
+ self.lv_name = lv.name
+ self.ceph_device = lvm.is_ceph_device(lv)
+ else:
+ self.lvs = []
+ if self.lsblk_all:
+ for dev in self.lsblk_all:
+ if dev['NAME'] == os.path.basename(self.path):
+ break
+ else:
+ dev = disk.lsblk(self.path)
+ self.disk_api = dev
+ device_type = dev.get('TYPE', '')
+ # always check is this is an lvm member
+ valid_types = ['part', 'disk']
+ if allow_loop_devices():
+ valid_types.append('loop')
+ if device_type in valid_types:
+ self._set_lvm_membership()
+
+ self.ceph_disk = CephDiskDevice(self)
+
+ def __repr__(self):
+ prefix = 'Unknown'
+ if self.is_lv:
+ prefix = 'LV'
+ elif self.is_partition:
+ prefix = 'Partition'
+ elif self.is_device:
+ prefix = 'Raw Device'
+ return '<%s: %s>' % (prefix, self.path)
+
+ def pretty_report(self):
+ def format_value(v):
+ if isinstance(v, list):
+ return ', '.join(v)
+ else:
+ return v
+ def format_key(k):
+ return k.strip('_').replace('_', ' ')
+ output = ['\n====== Device report {} ======\n'.format(self.path)]
+ output.extend(
+ [self.pretty_template.format(
+ attr=format_key(k),
+ value=format_value(v)) for k, v in vars(self).items() if k in
+ self.report_fields and k != 'disk_api' and k != 'sys_api'] )
+ output.extend(
+ [self.pretty_template.format(
+ attr=format_key(k),
+ value=format_value(v)) for k, v in self.sys_api.items() if k in
+ self.pretty_report_sys_fields])
+ for lv in self.lvs:
+ output.append("""
+ --- Logical Volume ---""")
+ output.extend(
+ [self.pretty_template.format(
+ attr=format_key(k),
+ value=format_value(v)) for k, v in lv.report().items()])
+ return ''.join(output)
+
+ def report(self):
+ return report_template.format(
+ dev=self.path,
+ size=self.size_human,
+ rot=self.rotational,
+ available=self.available,
+ model=self.model,
+ device_nodes=self.device_nodes
+ )
+
+ def json_report(self):
+ output = {k.strip('_'): v for k, v in vars(self).items() if k in
+ self.report_fields}
+ output['lvs'] = [lv.report() for lv in self.lvs]
+ return output
+
+ def _get_device_id(self):
+ """
+ Please keep this implementation in sync with get_device_id() in
+ src/common/blkdev.cc
+ """
+ props = ['ID_VENDOR', 'ID_MODEL', 'ID_MODEL_ENC', 'ID_SERIAL_SHORT', 'ID_SERIAL',
+ 'ID_SCSI_SERIAL']
+ p = disk.udevadm_property(self.path, props)
+ if p.get('ID_MODEL','').startswith('LVM PV '):
+ p['ID_MODEL'] = p.get('ID_MODEL_ENC', '').replace('\\x20', ' ').strip()
+ if 'ID_VENDOR' in p and 'ID_MODEL' in p and 'ID_SCSI_SERIAL' in p:
+ dev_id = '_'.join([p['ID_VENDOR'], p['ID_MODEL'],
+ p['ID_SCSI_SERIAL']])
+ elif 'ID_MODEL' in p and 'ID_SERIAL_SHORT' in p:
+ dev_id = '_'.join([p['ID_MODEL'], p['ID_SERIAL_SHORT']])
+ elif 'ID_SERIAL' in p:
+ dev_id = p['ID_SERIAL']
+ if dev_id.startswith('MTFD'):
+ # Micron NVMes hide the vendor
+ dev_id = 'Micron_' + dev_id
+ else:
+ # the else branch should fallback to using sysfs and ioctl to
+ # retrieve device_id on FreeBSD. Still figuring out if/how the
+ # python ioctl implementation does that on FreeBSD
+ dev_id = ''
+ dev_id.replace(' ', '_')
+ return dev_id
+
+ def _set_lvm_membership(self):
+ if self._is_lvm_member is None:
+ # this is contentious, if a PV is recognized by LVM but has no
+ # VGs, should we consider it as part of LVM? We choose not to
+ # here, because most likely, we need to use VGs from this PV.
+ self._is_lvm_member = False
+ device_to_check = [self.path]
+ device_to_check.extend(self.partitions)
+
+ # a pv can only be in one vg, so this should be safe
+ # FIXME: While the above assumption holds, sda1 and sda2
+ # can each host a PV and VG. I think the vg_name property is
+ # actually unused (not 100% sure) and can simply be removed
+ vgs = None
+ if not self.all_devices_vgs:
+ self.all_devices_vgs = lvm.get_all_devices_vgs()
+ for path in device_to_check:
+ for dev_vg in self.all_devices_vgs:
+ if dev_vg.pv_name == path:
+ vgs = [dev_vg]
+ if vgs:
+ self.vgs.extend(vgs)
+ self.vg_name = vgs[0]
+ self._is_lvm_member = True
+ self.lvs.extend(lvm.get_device_lvs(path))
+ if self.lvs:
+ self.ceph_device = any([True if lv.tags.get('ceph.osd_id') else False for lv in self.lvs])
+
+ def _get_partitions(self):
+ """
+ For block devices LVM can reside on the raw block device or on a
+ partition. Return a list of paths to be checked for a pv.
+ """
+ partitions = []
+ path_dir = os.path.dirname(self.path)
+ for partition in self.sys_api.get('partitions', {}).keys():
+ partitions.append(os.path.join(path_dir, partition))
+ return partitions
+
+ @property
+ def exists(self):
+ return os.path.exists(self.path)
+
+ @property
+ def has_fs(self):
+ self.load_blkid_api()
+ return 'TYPE' in self.blkid_api
+
+ @property
+ def has_gpt_headers(self):
+ self.load_blkid_api()
+ return self.blkid_api.get("PTTYPE") == "gpt"
+
+ @property
+ def rotational(self):
+ rotational = self.sys_api.get('rotational')
+ if rotational is None:
+ # fall back to lsblk if not found in sys_api
+ # default to '1' if no value is found with lsblk either
+ rotational = self.disk_api.get('ROTA', '1')
+ return rotational == '1'
+
+ @property
+ def model(self):
+ return self.sys_api['model']
+
+ @property
+ def size_human(self):
+ return self.sys_api['human_readable_size']
+
+ @property
+ def size(self):
+ return self.sys_api['size']
+
+ @property
+ def parent_device(self):
+ if 'PKNAME' in self.disk_api:
+ return '/dev/%s' % self.disk_api['PKNAME']
+ return None
+
+ @property
+ def lvm_size(self):
+ """
+ If this device was made into a PV it would lose 1GB in total size
+ due to the 1GB physical extent size we set when creating volume groups
+ """
+ size = disk.Size(b=self.size)
+ lvm_size = disk.Size(gb=size.gb.as_int()) - disk.Size(gb=1)
+ return lvm_size
+
+ @property
+ def is_lvm_member(self):
+ if self._is_lvm_member is None:
+ self._set_lvm_membership()
+ return self._is_lvm_member
+
+ @property
+ def is_ceph_disk_member(self):
+ def is_member(device):
+ return 'ceph' in device.get('PARTLABEL', '') or \
+ device.get('PARTTYPE', '') in ceph_disk_guids.keys()
+ # If we come from Devices(), self.lsblk_all is set already.
+ # Otherwise, we have to grab the data.
+ details = self.lsblk_all or disk.lsblk_all()
+ _is_member = False
+ if self.sys_api.get("partitions"):
+ for part in self.sys_api.get("partitions").keys():
+ for dev in details:
+ if part.startswith(dev['NAME']):
+ if is_member(dev):
+ _is_member = True
+ return _is_member
+ else:
+ return is_member(self.disk_api)
+ raise RuntimeError(f"Couln't check if device {self.path} is a ceph-disk member.")
+
+ @property
+ def has_bluestore_label(self):
+ return disk.has_bluestore_label(self.path)
+
+ @property
+ def is_mapper(self):
+ return self.path.startswith(('/dev/mapper', '/dev/dm-'))
+
+ @property
+ def device_type(self):
+ self.load_blkid_api()
+ if 'type' in self.sys_api:
+ return self.sys_api['type']
+ elif self.disk_api:
+ return self.disk_api['TYPE']
+ elif self.blkid_api:
+ return self.blkid_api['TYPE']
+
+ @property
+ def is_mpath(self):
+ return self.device_type == 'mpath'
+
+ @property
+ def is_lv(self):
+ return self.lv_api is not None
+
+ @property
+ def is_partition(self):
+ self.load_blkid_api()
+ if self.disk_api:
+ return self.disk_api['TYPE'] == 'part'
+ elif self.blkid_api:
+ return self.blkid_api['TYPE'] == 'part'
+ return False
+
+ @property
+ def is_device(self):
+ self.load_blkid_api()
+ api = None
+ if self.disk_api:
+ api = self.disk_api
+ elif self.blkid_api:
+ api = self.blkid_api
+ if api:
+ valid_types = ['disk', 'device', 'mpath']
+ if allow_loop_devices():
+ valid_types.append('loop')
+ return self.device_type in valid_types
+ return False
+
+ @property
+ def is_acceptable_device(self):
+ return self.is_device or self.is_partition
+
+ @property
+ def is_encrypted(self):
+ """
+ Only correct for LVs, device mappers, and partitions. Will report a ``None``
+ for raw devices.
+ """
+ self.load_blkid_api()
+ crypt_reports = [self.blkid_api.get('TYPE', ''), self.disk_api.get('FSTYPE', '')]
+ if self.is_lv:
+ # if disk APIs are reporting this is encrypted use that:
+ if 'crypto_LUKS' in crypt_reports:
+ return True
+ # if ceph-volume created this, then a tag would let us know
+ elif self.lv_api.encrypted:
+ return True
+ return False
+ elif self.is_partition:
+ return 'crypto_LUKS' in crypt_reports
+ elif self.is_mapper:
+ active_mapper = encryption_status(self.path)
+ if active_mapper:
+ # normalize a bit to ensure same values regardless of source
+ encryption_type = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks
+ return True if encryption_type in ['plain', 'luks'] else False
+ else:
+ return False
+ else:
+ return None
+
+ @property
+ def used_by_ceph(self):
+ # only filter out data devices as journals could potentially be reused
+ osd_ids = [lv.tags.get("ceph.osd_id") is not None for lv in self.lvs
+ if lv.tags.get("ceph.type") in ["data", "block"]]
+ return any(osd_ids)
+
+ @property
+ def vg_free_percent(self):
+ if self.vgs:
+ return [vg.free_percent for vg in self.vgs]
+ else:
+ return [1]
+
+ @property
+ def vg_size(self):
+ if self.vgs:
+ return [vg.size for vg in self.vgs]
+ else:
+ # TODO fix this...we can probably get rid of vg_free
+ return self.vg_free
+
+ @property
+ def vg_free(self):
+ '''
+ Returns the free space in all VGs on this device. If no VGs are
+ present, returns the disk size.
+ '''
+ if self.vgs:
+ return [vg.free for vg in self.vgs]
+ else:
+ # We could also query 'lvmconfig
+ # --typeconfig full' and use allocations -> physical_extent_size
+ # value to project the space for a vg
+ # assuming 4M extents here
+ extent_size = 4194304
+ vg_free = int(self.size / extent_size) * extent_size
+ if self.size % extent_size == 0:
+ # If the extent size divides size exactly, deduct on extent for
+ # LVM metadata
+ vg_free -= extent_size
+ return [vg_free]
+
+ @property
+ def has_partitions(self):
+ '''
+ Boolean to determine if a given device has partitions.
+ '''
+ if self.sys_api.get('partitions'):
+ return True
+ return False
+
+ def _check_generic_reject_reasons(self):
+ reasons = [
+ ('removable', 1, 'removable'),
+ ('ro', 1, 'read-only'),
+ ('locked', 1, 'locked'),
+ ]
+ rejected = [reason for (k, v, reason) in reasons if
+ self.sys_api.get(k, '') == v]
+ if self.is_acceptable_device:
+ # reject disks smaller than 5GB
+ if int(self.sys_api.get('size', 0)) < 5368709120:
+ rejected.append('Insufficient space (<5GB)')
+ else:
+ rejected.append("Device type is not acceptable. It should be raw device or partition")
+ if self.is_ceph_disk_member:
+ rejected.append("Used by ceph-disk")
+
+ try:
+ if self.has_bluestore_label:
+ rejected.append('Has BlueStore device label')
+ except OSError as e:
+ # likely failed to open the device. assuming it is BlueStore is the safest option
+ # so that a possibly-already-existing OSD doesn't get overwritten
+ logger.error('failed to determine if device {} is BlueStore. device should not be used to avoid false negatives. err: {}'.format(self.path, e))
+ rejected.append('Failed to determine if device is BlueStore')
+
+ if self.is_partition:
+ try:
+ if disk.has_bluestore_label(self.parent_device):
+ rejected.append('Parent has BlueStore device label')
+ except OSError as e:
+ # likely failed to open the device. assuming the parent is BlueStore is the safest
+ # option so that a possibly-already-existing OSD doesn't get overwritten
+ logger.error('failed to determine if partition {} (parent: {}) has a BlueStore parent. partition should not be used to avoid false negatives. err: {}'.format(self.path, self.parent_device, e))
+ rejected.append('Failed to determine if parent device is BlueStore')
+
+ if self.has_gpt_headers:
+ rejected.append('Has GPT headers')
+ if self.has_partitions:
+ rejected.append('Has partitions')
+ return rejected
+
+ def _check_lvm_reject_reasons(self):
+ rejected = []
+ if self.vgs:
+ available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10]
+ if not available_vgs:
+ rejected.append('Insufficient space (<10 extents) on vgs')
+ else:
+ # only check generic if no vgs are present. Vgs might hold lvs and
+ # that might cause 'locked' to trigger
+ rejected.extend(self._check_generic_reject_reasons())
+
+ return len(rejected) == 0, rejected
+
+ def _check_raw_reject_reasons(self):
+ rejected = self._check_generic_reject_reasons()
+ if len(self.vgs) > 0:
+ rejected.append('LVM detected')
+
+ return len(rejected) == 0, rejected
+
+ @property
+ def available_lvm_batch(self):
+ if self.sys_api.get("partitions"):
+ return False
+ if system.device_is_mounted(self.path):
+ return False
+ return self.is_device or self.is_lv
+
+
+class CephDiskDevice(object):
+ """
+ Detect devices that have been created by ceph-disk, report their type
+ (journal, data, etc..). Requires a ``Device`` object as input.
+ """
+
+ def __init__(self, device):
+ self.device = device
+ self._is_ceph_disk_member = None
+
+ @property
+ def partlabel(self):
+ """
+ In containers, the 'PARTLABEL' attribute might not be detected
+ correctly via ``lsblk``, so we poke at the value with ``lsblk`` first,
+ falling back to ``blkid`` (which works correclty in containers).
+ """
+ lsblk_partlabel = self.device.disk_api.get('PARTLABEL')
+ if lsblk_partlabel:
+ return lsblk_partlabel
+ return self.device.blkid_api.get('PARTLABEL', '')
+
+ @property
+ def parttype(self):
+ """
+ Seems like older version do not detect PARTTYPE correctly (assuming the
+ info in util/disk.py#lsblk is still valid).
+ SImply resolve to using blkid since lsblk will throw an error if asked
+ for an unknown columns
+ """
+ return self.device.blkid_api.get('PARTTYPE', '')
+
+ @property
+ def is_member(self):
+ if self._is_ceph_disk_member is None:
+ if 'ceph' in self.partlabel:
+ self._is_ceph_disk_member = True
+ return True
+ elif self.parttype in ceph_disk_guids.keys():
+ return True
+ return False
+ return self._is_ceph_disk_member
+
+ @property
+ def type(self):
+ types = [
+ 'data', 'wal', 'db', 'lockbox', 'journal',
+ # ceph-disk uses 'ceph block' when placing data in bluestore, but
+ # keeps the regular OSD files in 'ceph data' :( :( :( :(
+ 'block',
+ ]
+ for t in types:
+ if t in self.partlabel:
+ return t
+ label = ceph_disk_guids.get(self.parttype, {})
+ return label.get('type', 'unknown').split('.')[-1]
diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py
new file mode 100644
index 000000000..4a310e30f
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/disk.py
@@ -0,0 +1,927 @@
+import logging
+import os
+import re
+import stat
+import time
+from ceph_volume import process
+from ceph_volume.api import lvm
+from ceph_volume.util.system import get_file_contents
+
+
+logger = logging.getLogger(__name__)
+
+
+# The blkid CLI tool has some oddities which prevents having one common call
+# to extract the information instead of having separate utilities. The `udev`
+# type of output is needed in older versions of blkid (v 2.23) that will not
+# work correctly with just the ``-p`` flag to bypass the cache for example.
+# Xenial doesn't have this problem as it uses a newer blkid version.
+
+
+def get_partuuid(device):
+ """
+ If a device is a partition, it will probably have a PARTUUID on it that
+ will persist and can be queried against `blkid` later to detect the actual
+ device
+ """
+ out, err, rc = process.call(
+ ['blkid', '-c', '/dev/null', '-s', 'PARTUUID', '-o', 'value', device]
+ )
+ return ' '.join(out).strip()
+
+
+def _blkid_parser(output):
+ """
+ Parses the output from a system ``blkid`` call, requires output to be
+ produced using the ``-p`` flag which bypasses the cache, mangling the
+ names. These names are corrected to what it would look like without the
+ ``-p`` flag.
+
+ Normal output::
+
+ /dev/sdb1: UUID="62416664-cbaf-40bd-9689-10bd337379c3" TYPE="xfs" [...]
+ """
+ # first spaced separated item is garbage, gets tossed:
+ output = ' '.join(output.split()[1:])
+ # split again, respecting possible whitespace in quoted values
+ pairs = output.split('" ')
+ raw = {}
+ processed = {}
+ mapping = {
+ 'UUID': 'UUID',
+ 'TYPE': 'TYPE',
+ 'PART_ENTRY_NAME': 'PARTLABEL',
+ 'PART_ENTRY_UUID': 'PARTUUID',
+ 'PART_ENTRY_TYPE': 'PARTTYPE',
+ 'PTTYPE': 'PTTYPE',
+ }
+
+ for pair in pairs:
+ try:
+ column, value = pair.split('=')
+ except ValueError:
+ continue
+ raw[column] = value.strip().strip().strip('"')
+
+ for key, value in raw.items():
+ new_key = mapping.get(key)
+ if not new_key:
+ continue
+ processed[new_key] = value
+
+ return processed
+
+
+def blkid(device):
+ """
+ The blkid interface to its CLI, creating an output similar to what is
+ expected from ``lsblk``. In most cases, ``lsblk()`` should be the preferred
+ method for extracting information about a device. There are some corner
+ cases where it might provide information that is otherwise unavailable.
+
+ The system call uses the ``-p`` flag which bypasses the cache, the caveat
+ being that the keys produced are named completely different to expected
+ names.
+
+ For example, instead of ``PARTLABEL`` it provides a ``PART_ENTRY_NAME``.
+ A bit of translation between these known keys is done, which is why
+ ``lsblk`` should always be preferred: the output provided here is not as
+ rich, given that a translation of keys is required for a uniform interface
+ with the ``-p`` flag.
+
+ Label name to expected output chart:
+
+ cache bypass name expected name
+
+ UUID UUID
+ TYPE TYPE
+ PART_ENTRY_NAME PARTLABEL
+ PART_ENTRY_UUID PARTUUID
+ """
+ out, err, rc = process.call(
+ ['blkid', '-c', '/dev/null', '-p', device]
+ )
+ return _blkid_parser(' '.join(out))
+
+
+def get_part_entry_type(device):
+ """
+ Parses the ``ID_PART_ENTRY_TYPE`` from the "low level" (bypasses the cache)
+ output that uses the ``udev`` type of output. This output is intended to be
+ used for udev rules, but it is useful in this case as it is the only
+ consistent way to retrieve the GUID used by ceph-disk to identify devices.
+ """
+ out, err, rc = process.call(['blkid', '-c', '/dev/null', '-p', '-o', 'udev', device])
+ for line in out:
+ if 'ID_PART_ENTRY_TYPE=' in line:
+ return line.split('=')[-1].strip()
+ return ''
+
+
+def get_device_from_partuuid(partuuid):
+ """
+ If a device has a partuuid, query blkid so that it can tell us what that
+ device is
+ """
+ out, err, rc = process.call(
+ ['blkid', '-c', '/dev/null', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device']
+ )
+ return ' '.join(out).strip()
+
+
+def remove_partition(device):
+ """
+ Removes a partition using parted
+
+ :param device: A ``Device()`` object
+ """
+ # Sometimes there's a race condition that makes 'ID_PART_ENTRY_NUMBER' be not present
+ # in the output of `udevadm info --query=property`.
+ # Probably not ideal and not the best fix but this allows to get around that issue.
+ # The idea is to make it retry multiple times before actually failing.
+ for i in range(10):
+ udev_info = udevadm_property(device.path)
+ partition_number = udev_info.get('ID_PART_ENTRY_NUMBER')
+ if partition_number:
+ break
+ time.sleep(0.2)
+ if not partition_number:
+ raise RuntimeError('Unable to detect the partition number for device: %s' % device.path)
+
+ process.run(
+ ['parted', device.parent_device, '--script', '--', 'rm', partition_number]
+ )
+
+
+def _stat_is_device(stat_obj):
+ """
+ Helper function that will interpret ``os.stat`` output directly, so that other
+ functions can call ``os.stat`` once and interpret that result several times
+ """
+ return stat.S_ISBLK(stat_obj)
+
+
+def _lsblk_parser(line):
+ """
+ Parses lines in lsblk output. Requires output to be in pair mode (``-P`` flag). Lines
+ need to be whole strings, the line gets split when processed.
+
+ :param line: A string, with the full line from lsblk output
+ """
+ # parse the COLUMN="value" output to construct the dictionary
+ pairs = line.split('" ')
+ parsed = {}
+ for pair in pairs:
+ try:
+ column, value = pair.split('=')
+ except ValueError:
+ continue
+ parsed[column] = value.strip().strip().strip('"')
+ return parsed
+
+
+def device_family(device):
+ """
+ Returns a list of associated devices. It assumes that ``device`` is
+ a parent device. It is up to the caller to ensure that the device being
+ used is a parent, not a partition.
+ """
+ labels = ['NAME', 'PARTLABEL', 'TYPE']
+ command = ['lsblk', '-P', '-p', '-o', ','.join(labels), device]
+ out, err, rc = process.call(command)
+ devices = []
+ for line in out:
+ devices.append(_lsblk_parser(line))
+
+ return devices
+
+
+def udevadm_property(device, properties=[]):
+ """
+ Query udevadm for information about device properties.
+ Optionally pass a list of properties to return. A requested property might
+ not be returned if not present.
+
+ Expected output format::
+ # udevadm info --query=property --name=/dev/sda :(
+ DEVNAME=/dev/sda
+ DEVTYPE=disk
+ ID_ATA=1
+ ID_BUS=ata
+ ID_MODEL=SK_hynix_SC311_SATA_512GB
+ ID_PART_TABLE_TYPE=gpt
+ ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c
+ ID_PATH=pci-0000:00:17.0-ata-3
+ ID_PATH_TAG=pci-0000_00_17_0-ata-3
+ ID_REVISION=70000P10
+ ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A
+ TAGS=:systemd:
+ USEC_INITIALIZED=16117769
+ ...
+ """
+ out = _udevadm_info(device)
+ ret = {}
+ for line in out:
+ p, v = line.split('=', 1)
+ if not properties or p in properties:
+ ret[p] = v
+ return ret
+
+
+def _udevadm_info(device):
+ """
+ Call udevadm and return the output
+ """
+ cmd = ['udevadm', 'info', '--query=property', device]
+ out, _err, _rc = process.call(cmd)
+ return out
+
+
+def lsblk(device, columns=None, abspath=False):
+ return lsblk_all(device=device,
+ columns=columns,
+ abspath=abspath)
+
+def lsblk_all(device='', columns=None, abspath=False):
+ """
+ Create a dictionary of identifying values for a device using ``lsblk``.
+ Each supported column is a key, in its *raw* format (all uppercase
+ usually). ``lsblk`` has support for certain "columns" (in blkid these
+ would be labels), and these columns vary between distributions and
+ ``lsblk`` versions. The newer versions support a richer set of columns,
+ while older ones were a bit limited.
+
+ These are a subset of lsblk columns which are known to work on both CentOS 7 and Xenial:
+
+ NAME device name
+ KNAME internal kernel device name
+ PKNAME internal kernel parent device name
+ MAJ:MIN major:minor device number
+ FSTYPE filesystem type
+ MOUNTPOINT where the device is mounted
+ LABEL filesystem LABEL
+ UUID filesystem UUID
+ RO read-only device
+ RM removable device
+ MODEL device identifier
+ SIZE size of the device
+ STATE state of the device
+ OWNER user name
+ GROUP group name
+ MODE device node permissions
+ ALIGNMENT alignment offset
+ MIN-IO minimum I/O size
+ OPT-IO optimal I/O size
+ PHY-SEC physical sector size
+ LOG-SEC logical sector size
+ ROTA rotational device
+ SCHED I/O scheduler name
+ RQ-SIZE request queue size
+ TYPE device type
+ PKNAME internal parent kernel device name
+ DISC-ALN discard alignment offset
+ DISC-GRAN discard granularity
+ DISC-MAX discard max bytes
+ DISC-ZERO discard zeroes data
+
+ There is a bug in ``lsblk`` where using all the available (supported)
+ columns will result in no output (!), in order to workaround this the
+ following columns have been removed from the default reporting columns:
+
+ * RQ-SIZE (request queue size)
+ * MIN-IO minimum I/O size
+ * OPT-IO optimal I/O size
+
+ These should be available however when using `columns`. For example::
+
+ >>> lsblk('/dev/sda1', columns=['OPT-IO'])
+ {'OPT-IO': '0'}
+
+ Normal CLI output, as filtered by the flags in this function will look like ::
+
+ $ lsblk -P -o NAME,KNAME,PKNAME,MAJ:MIN,FSTYPE,MOUNTPOINT
+ NAME="sda1" KNAME="sda1" MAJ:MIN="8:1" FSTYPE="ext4" MOUNTPOINT="/"
+
+ :param columns: A list of columns to report as keys in its original form.
+ :param abspath: Set the flag for absolute paths on the report
+ """
+ default_columns = [
+ 'NAME', 'KNAME', 'PKNAME', 'MAJ:MIN', 'FSTYPE', 'MOUNTPOINT', 'LABEL',
+ 'UUID', 'RO', 'RM', 'MODEL', 'SIZE', 'STATE', 'OWNER', 'GROUP', 'MODE',
+ 'ALIGNMENT', 'PHY-SEC', 'LOG-SEC', 'ROTA', 'SCHED', 'TYPE', 'DISC-ALN',
+ 'DISC-GRAN', 'DISC-MAX', 'DISC-ZERO', 'PKNAME', 'PARTLABEL'
+ ]
+ columns = columns or default_columns
+ # -P -> Produce pairs of COLUMN="value"
+ # -p -> Return full paths to devices, not just the names, when ``abspath`` is set
+ # -o -> Use the columns specified or default ones provided by this function
+ base_command = ['lsblk', '-P']
+ if abspath:
+ base_command.append('-p')
+ base_command.append('-o')
+ base_command.append(','.join(columns))
+
+ out, err, rc = process.call(base_command)
+
+ if rc != 0:
+ raise RuntimeError(f"Error: {err}")
+
+ result = []
+
+ for line in out:
+ result.append(_lsblk_parser(line))
+
+ if not device:
+ return result
+
+ for dev in result:
+ if dev['NAME'] == os.path.basename(device):
+ return dev
+
+ return {}
+
+def is_device(dev):
+ """
+ Boolean to determine if a given device is a block device (**not**
+ a partition!)
+
+ For example: /dev/sda would return True, but not /dev/sdc1
+ """
+ if not os.path.exists(dev):
+ return False
+ if not dev.startswith('/dev/'):
+ return False
+ if dev[len('/dev/'):].startswith('loop'):
+ if not allow_loop_devices():
+ return False
+
+ # fallback to stat
+ return _stat_is_device(os.lstat(dev).st_mode)
+
+
+def is_partition(dev):
+ """
+ Boolean to determine if a given device is a partition, like /dev/sda1
+ """
+ if not os.path.exists(dev):
+ return False
+ # use lsblk first, fall back to using stat
+ TYPE = lsblk(dev).get('TYPE')
+ if TYPE:
+ return TYPE == 'part'
+
+ # fallback to stat
+ stat_obj = os.stat(dev)
+ if _stat_is_device(stat_obj.st_mode):
+ return False
+
+ major = os.major(stat_obj.st_rdev)
+ minor = os.minor(stat_obj.st_rdev)
+ if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
+ return True
+ return False
+
+
+def is_ceph_rbd(dev):
+ """
+ Boolean to determine if a given device is a ceph RBD device, like /dev/rbd0
+ """
+ return dev.startswith(('/dev/rbd'))
+
+
+class BaseFloatUnit(float):
+ """
+ Base class to support float representations of size values. Suffix is
+ computed on child classes by inspecting the class name
+ """
+
+ def __repr__(self):
+ return "<%s(%s)>" % (self.__class__.__name__, self.__float__())
+
+ def __str__(self):
+ return "{size:.2f} {suffix}".format(
+ size=self.__float__(),
+ suffix=self.__class__.__name__.split('Float')[-1]
+ )
+
+ def as_int(self):
+ return int(self.real)
+
+ def as_float(self):
+ return self.real
+
+
+class FloatB(BaseFloatUnit):
+ pass
+
+
+class FloatMB(BaseFloatUnit):
+ pass
+
+
+class FloatGB(BaseFloatUnit):
+ pass
+
+
+class FloatKB(BaseFloatUnit):
+ pass
+
+
+class FloatTB(BaseFloatUnit):
+ pass
+
+class FloatPB(BaseFloatUnit):
+ pass
+
+class Size(object):
+ """
+ Helper to provide an interface for different sizes given a single initial
+ input. Allows for comparison between different size objects, which avoids
+ the need to convert sizes before comparison (e.g. comparing megabytes
+ against gigabytes).
+
+ Common comparison operators are supported::
+
+ >>> hd1 = Size(gb=400)
+ >>> hd2 = Size(gb=500)
+ >>> hd1 > hd2
+ False
+ >>> hd1 < hd2
+ True
+ >>> hd1 == hd2
+ False
+ >>> hd1 == Size(gb=400)
+ True
+
+ The Size object can also be multiplied or divided::
+
+ >>> hd1
+ <Size(400.00 GB)>
+ >>> hd1 * 2
+ <Size(800.00 GB)>
+ >>> hd1
+ <Size(800.00 GB)>
+
+ Additions and subtractions are only supported between Size objects::
+
+ >>> Size(gb=224) - Size(gb=100)
+ <Size(124.00 GB)>
+ >>> Size(gb=1) + Size(mb=300)
+ <Size(1.29 GB)>
+
+ Can also display a human-readable representation, with automatic detection
+ on best suited unit, or alternatively, specific unit representation::
+
+ >>> s = Size(mb=2211)
+ >>> s
+ <Size(2.16 GB)>
+ >>> s.mb
+ <FloatMB(2211.0)>
+ >>> print("Total size: %s" % s.mb)
+ Total size: 2211.00 MB
+ >>> print("Total size: %s" % s)
+ Total size: 2.16 GB
+ """
+
+ @classmethod
+ def parse(cls, size):
+ if (len(size) > 2 and
+ size[-2].lower() in ['k', 'm', 'g', 't', 'p'] and
+ size[-1].lower() == 'b'):
+ return cls(**{size[-2:].lower(): float(size[0:-2])})
+ elif size[-1].lower() in ['b', 'k', 'm', 'g', 't', 'p']:
+ return cls(**{size[-1].lower(): float(size[0:-1])})
+ else:
+ return cls(b=float(size))
+
+
+ def __init__(self, multiplier=1024, **kw):
+ self._multiplier = multiplier
+ # create a mapping of units-to-multiplier, skip bytes as that is
+ # calculated initially always and does not need to convert
+ aliases = [
+ [('k', 'kb', 'kilobytes'), self._multiplier],
+ [('m', 'mb', 'megabytes'), self._multiplier ** 2],
+ [('g', 'gb', 'gigabytes'), self._multiplier ** 3],
+ [('t', 'tb', 'terabytes'), self._multiplier ** 4],
+ [('p', 'pb', 'petabytes'), self._multiplier ** 5]
+ ]
+ # and mappings for units-to-formatters, including bytes and aliases for
+ # each
+ format_aliases = [
+ [('b', 'bytes'), FloatB],
+ [('kb', 'kilobytes'), FloatKB],
+ [('mb', 'megabytes'), FloatMB],
+ [('gb', 'gigabytes'), FloatGB],
+ [('tb', 'terabytes'), FloatTB],
+ [('pb', 'petabytes'), FloatPB],
+ ]
+ self._formatters = {}
+ for key, value in format_aliases:
+ for alias in key:
+ self._formatters[alias] = value
+ self._factors = {}
+ for key, value in aliases:
+ for alias in key:
+ self._factors[alias] = value
+
+ for k, v in kw.items():
+ self._convert(v, k)
+ # only pursue the first occurrence
+ break
+
+ def _convert(self, size, unit):
+ """
+ Convert any size down to bytes so that other methods can rely on bytes
+ being available always, regardless of what they pass in, avoiding the
+ need for a mapping of every permutation.
+ """
+ if unit in ['b', 'bytes']:
+ self._b = size
+ return
+ factor = self._factors[unit]
+ self._b = float(size * factor)
+
+ def _get_best_format(self):
+ """
+ Go through all the supported units, and use the first one that is less
+ than 1024. This allows to represent size in the most readable format
+ available
+ """
+ for unit in ['b', 'kb', 'mb', 'gb', 'tb', 'pb']:
+ if getattr(self, unit) > 1024:
+ continue
+ return getattr(self, unit)
+
+ def __repr__(self):
+ return "<Size(%s)>" % self._get_best_format()
+
+ def __str__(self):
+ return "%s" % self._get_best_format()
+
+ def __format__(self, spec):
+ return str(self._get_best_format()).__format__(spec)
+
+ def __int__(self):
+ return int(self._b)
+
+ def __float__(self):
+ return self._b
+
+ def __lt__(self, other):
+ if isinstance(other, Size):
+ return self._b < other._b
+ else:
+ return self.b < other
+
+ def __le__(self, other):
+ if isinstance(other, Size):
+ return self._b <= other._b
+ else:
+ return self.b <= other
+
+ def __eq__(self, other):
+ if isinstance(other, Size):
+ return self._b == other._b
+ else:
+ return self.b == other
+
+ def __ne__(self, other):
+ if isinstance(other, Size):
+ return self._b != other._b
+ else:
+ return self.b != other
+
+ def __ge__(self, other):
+ if isinstance(other, Size):
+ return self._b >= other._b
+ else:
+ return self.b >= other
+
+ def __gt__(self, other):
+ if isinstance(other, Size):
+ return self._b > other._b
+ else:
+ return self.b > other
+
+ def __add__(self, other):
+ if isinstance(other, Size):
+ _b = self._b + other._b
+ return Size(b=_b)
+ raise TypeError('Cannot add "Size" object with int')
+
+ def __sub__(self, other):
+ if isinstance(other, Size):
+ _b = self._b - other._b
+ return Size(b=_b)
+ raise TypeError('Cannot subtract "Size" object from int')
+
+ def __mul__(self, other):
+ if isinstance(other, Size):
+ raise TypeError('Cannot multiply with "Size" object')
+ _b = self._b * other
+ return Size(b=_b)
+
+ def __truediv__(self, other):
+ if isinstance(other, Size):
+ return self._b / other._b
+ _b = self._b / other
+ return Size(b=_b)
+
+ def __div__(self, other):
+ if isinstance(other, Size):
+ return self._b / other._b
+ _b = self._b / other
+ return Size(b=_b)
+
+ def __bool__(self):
+ return self.b != 0
+
+ def __nonzero__(self):
+ return self.__bool__()
+
+ def __getattr__(self, unit):
+ """
+ Calculate units on the fly, relies on the fact that ``bytes`` has been
+ converted at instantiation. Units that don't exist will trigger an
+ ``AttributeError``
+ """
+ try:
+ formatter = self._formatters[unit]
+ except KeyError:
+ raise AttributeError('Size object has not attribute "%s"' % unit)
+ if unit in ['b', 'bytes']:
+ return formatter(self._b)
+ try:
+ factor = self._factors[unit]
+ except KeyError:
+ raise AttributeError('Size object has not attribute "%s"' % unit)
+ return formatter(float(self._b) / factor)
+
+
+def human_readable_size(size):
+ """
+ Take a size in bytes, and transform it into a human readable size with up
+ to two decimals of precision.
+ """
+ suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
+ for suffix in suffixes:
+ if size >= 1024:
+ size = size / 1024
+ else:
+ break
+ return "{size:.2f} {suffix}".format(
+ size=size,
+ suffix=suffix)
+
+
+def size_from_human_readable(s):
+ """
+ Takes a human readable string and converts into a Size. If no unit is
+ passed, bytes is assumed.
+ """
+ s = s.replace(' ', '')
+ if s[-1].isdigit():
+ return Size(b=float(s))
+ n = float(s[:-1])
+ if s[-1].lower() == 'p':
+ return Size(pb=n)
+ if s[-1].lower() == 't':
+ return Size(tb=n)
+ if s[-1].lower() == 'g':
+ return Size(gb=n)
+ if s[-1].lower() == 'm':
+ return Size(mb=n)
+ if s[-1].lower() == 'k':
+ return Size(kb=n)
+ return None
+
+
+def get_partitions_facts(sys_block_path):
+ partition_metadata = {}
+ for folder in os.listdir(sys_block_path):
+ folder_path = os.path.join(sys_block_path, folder)
+ if os.path.exists(os.path.join(folder_path, 'partition')):
+ contents = get_file_contents(os.path.join(folder_path, 'partition'))
+ if contents:
+ part = {}
+ partname = folder
+ part_sys_block_path = os.path.join(sys_block_path, partname)
+
+ part['start'] = get_file_contents(part_sys_block_path + "/start", 0)
+ part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0)
+
+ part['sectorsize'] = get_file_contents(
+ part_sys_block_path + "/queue/logical_block_size")
+ if not part['sectorsize']:
+ part['sectorsize'] = get_file_contents(
+ part_sys_block_path + "/queue/hw_sector_size", 512)
+ part['size'] = float(part['sectors']) * 512
+ part['human_readable_size'] = human_readable_size(float(part['sectors']) * 512)
+ part['holders'] = []
+ for holder in os.listdir(part_sys_block_path + '/holders'):
+ part['holders'].append(holder)
+
+ partition_metadata[partname] = part
+ return partition_metadata
+
+
+def is_mapper_device(device_name):
+ return device_name.startswith(('/dev/mapper', '/dev/dm-'))
+
+
+def is_locked_raw_device(disk_path):
+ """
+ A device can be locked by a third party software like a database.
+ To detect that case, the device is opened in Read/Write and exclusive mode
+ """
+ open_flags = (os.O_RDWR | os.O_EXCL)
+ open_mode = 0
+ fd = None
+
+ try:
+ fd = os.open(disk_path, open_flags, open_mode)
+ except OSError:
+ return 1
+
+ try:
+ os.close(fd)
+ except OSError:
+ return 1
+
+ return 0
+
+
+class AllowLoopDevices(object):
+ allow = False
+ warned = False
+
+ @classmethod
+ def __call__(cls):
+ val = os.environ.get("CEPH_VOLUME_ALLOW_LOOP_DEVICES", "false").lower()
+ if val not in ("false", 'no', '0'):
+ cls.allow = True
+ if not cls.warned:
+ logger.warning(
+ "CEPH_VOLUME_ALLOW_LOOP_DEVICES is set in your "
+ "environment, so we will allow the use of unattached loop"
+ " devices as disks. This feature is intended for "
+ "development purposes only and will never be supported in"
+ " production. Issues filed based on this behavior will "
+ "likely be ignored."
+ )
+ cls.warned = True
+ return cls.allow
+
+
+allow_loop_devices = AllowLoopDevices()
+
+
+def get_block_devs_sysfs(_sys_block_path='/sys/block', _sys_dev_block_path='/sys/dev/block', device=''):
+ def holder_inner_loop():
+ for holder in holders:
+ # /sys/block/sdy/holders/dm-8/dm/uuid
+ holder_dm_type = get_file_contents(os.path.join(_sys_block_path, dev, f'holders/{holder}/dm/uuid')).split('-')[0].lower()
+ if holder_dm_type == 'mpath':
+ return True
+
+ # First, get devices that are _not_ partitions
+ result = list()
+ if not device:
+ dev_names = os.listdir(_sys_block_path)
+ else:
+ dev_names = [device]
+ for dev in dev_names:
+ name = kname = os.path.join("/dev", dev)
+ if not os.path.exists(name):
+ continue
+ type_ = 'disk'
+ holders = os.listdir(os.path.join(_sys_block_path, dev, 'holders'))
+ if get_file_contents(os.path.join(_sys_block_path, dev, 'removable')) == "1":
+ continue
+ if holder_inner_loop():
+ continue
+ dm_dir_path = os.path.join(_sys_block_path, dev, 'dm')
+ if os.path.isdir(dm_dir_path):
+ dm_type = get_file_contents(os.path.join(dm_dir_path, 'uuid'))
+ type_ = dm_type.split('-')[0].lower()
+ basename = get_file_contents(os.path.join(dm_dir_path, 'name'))
+ name = os.path.join("/dev/mapper", basename)
+ if dev.startswith('loop'):
+ if not allow_loop_devices():
+ continue
+ # Skip loop devices that are not attached
+ if not os.path.exists(os.path.join(_sys_block_path, dev, 'loop')):
+ continue
+ type_ = 'loop'
+ result.append([kname, name, type_])
+ # Next, look for devices that _are_ partitions
+ for item in os.listdir(_sys_dev_block_path):
+ is_part = get_file_contents(os.path.join(_sys_dev_block_path, item, 'partition')) == "1"
+ dev = os.path.basename(os.readlink(os.path.join(_sys_dev_block_path, item)))
+ if not is_part:
+ continue
+ name = kname = os.path.join("/dev", dev)
+ result.append([name, kname, "part"])
+ return sorted(result, key=lambda x: x[0])
+
+
+def get_devices(_sys_block_path='/sys/block', device=''):
+ """
+ Captures all available block devices as reported by lsblk.
+ Additional interesting metadata like sectors, size, vendor,
+ solid/rotational, etc. is collected from /sys/block/<device>
+
+ Returns a dictionary, where keys are the full paths to devices.
+
+ ..note:: loop devices, removable media, and logical volumes are never included.
+ """
+
+ device_facts = {}
+
+ block_devs = get_block_devs_sysfs(_sys_block_path)
+
+ block_types = ['disk', 'mpath']
+ if allow_loop_devices():
+ block_types.append('loop')
+
+ for block in block_devs:
+ devname = os.path.basename(block[0])
+ diskname = block[1]
+ if block[2] not in block_types:
+ continue
+ sysdir = os.path.join(_sys_block_path, devname)
+ metadata = {}
+
+ # If the device is ceph rbd it gets excluded
+ if is_ceph_rbd(diskname):
+ continue
+
+ # If the mapper device is a logical volume it gets excluded
+ if is_mapper_device(diskname):
+ if lvm.get_device_lvs(diskname):
+ continue
+
+ # all facts that have no defaults
+ # (<name>, <path relative to _sys_block_path>)
+ facts = [('removable', 'removable'),
+ ('ro', 'ro'),
+ ('vendor', 'device/vendor'),
+ ('model', 'device/model'),
+ ('rev', 'device/rev'),
+ ('sas_address', 'device/sas_address'),
+ ('sas_device_handle', 'device/sas_device_handle'),
+ ('support_discard', 'queue/discard_granularity'),
+ ('rotational', 'queue/rotational'),
+ ('nr_requests', 'queue/nr_requests'),
+ ]
+ for key, file_ in facts:
+ metadata[key] = get_file_contents(os.path.join(sysdir, file_))
+
+ device_slaves = os.listdir(os.path.join(sysdir, 'slaves'))
+ if device_slaves:
+ metadata['device_nodes'] = ','.join(device_slaves)
+ else:
+ metadata['device_nodes'] = devname
+
+ metadata['scheduler_mode'] = ""
+ scheduler = get_file_contents(sysdir + "/queue/scheduler")
+ if scheduler is not None:
+ m = re.match(r".*?(\[(.*)\])", scheduler)
+ if m:
+ metadata['scheduler_mode'] = m.group(2)
+
+ metadata['partitions'] = get_partitions_facts(sysdir)
+
+ size = get_file_contents(os.path.join(sysdir, 'size'), 0)
+
+ metadata['sectors'] = get_file_contents(os.path.join(sysdir, 'sectors'), 0)
+ fallback_sectorsize = get_file_contents(sysdir + "/queue/hw_sector_size", 512)
+ metadata['sectorsize'] = get_file_contents(sysdir +
+ "/queue/logical_block_size",
+ fallback_sectorsize)
+ metadata['size'] = float(size) * 512
+ metadata['human_readable_size'] = human_readable_size(metadata['size'])
+ metadata['path'] = diskname
+ metadata['locked'] = is_locked_raw_device(metadata['path'])
+ metadata['type'] = block[2]
+
+ device_facts[diskname] = metadata
+ return device_facts
+
+def has_bluestore_label(device_path):
+ isBluestore = False
+ bluestoreDiskSignature = 'bluestore block device' # 22 bytes long
+
+ # throws OSError on failure
+ logger.info("opening device {} to check for BlueStore label".format(device_path))
+ try:
+ with open(device_path, "rb") as fd:
+ # read first 22 bytes looking for bluestore disk signature
+ signature = fd.read(22)
+ if signature.decode('ascii', 'replace') == bluestoreDiskSignature:
+ isBluestore = True
+ except IsADirectoryError:
+ logger.info(f'{device_path} is a directory, skipping.')
+
+ return isBluestore
diff --git a/src/ceph-volume/ceph_volume/util/encryption.py b/src/ceph-volume/ceph_volume/util/encryption.py
new file mode 100644
index 000000000..cefd6094b
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/encryption.py
@@ -0,0 +1,278 @@
+import base64
+import os
+import logging
+from ceph_volume import process, conf, terminal
+from ceph_volume.util import constants, system
+from ceph_volume.util.device import Device
+from .prepare import write_keyring
+from .disk import lsblk, device_family, get_part_entry_type
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+def get_key_size_from_conf():
+ """
+ Return the osd dmcrypt key size from config file.
+ Default is 512.
+ """
+ default_key_size = '512'
+ key_size = conf.ceph.get_safe(
+ 'osd',
+ 'osd_dmcrypt_key_size',
+ default='512', check_valid=False)
+
+ if key_size not in ['256', '512']:
+ logger.warning(("Invalid value set for osd_dmcrypt_key_size ({}). "
+ "Falling back to {}bits".format(key_size, default_key_size)))
+ return default_key_size
+
+ return key_size
+
+def create_dmcrypt_key():
+ """
+ Create the secret dm-crypt key (KEK) used to encrypt/decrypt the Volume Key.
+ """
+ random_string = os.urandom(128)
+ key = base64.b64encode(random_string).decode('utf-8')
+ return key
+
+
+def luks_format(key, device):
+ """
+ Decrypt (open) an encrypted device, previously prepared with cryptsetup
+
+ :param key: dmcrypt secret key, will be used for decrypting
+ :param device: Absolute path to device
+ """
+ command = [
+ 'cryptsetup',
+ '--batch-mode', # do not prompt
+ '--key-size',
+ get_key_size_from_conf(),
+ '--key-file', # misnomer, should be key
+ '-', # because we indicate stdin for the key here
+ 'luksFormat',
+ device,
+ ]
+ process.call(command, stdin=key, terminal_verbose=True, show_command=True)
+
+
+def plain_open(key, device, mapping):
+ """
+ Decrypt (open) an encrypted device, previously prepared with cryptsetup in plain mode
+
+ .. note: ceph-disk will require an additional b64decode call for this to work
+
+ :param key: dmcrypt secret key
+ :param device: absolute path to device
+ :param mapping: mapping name used to correlate device. Usually a UUID
+ """
+ command = [
+ 'cryptsetup',
+ '--key-file',
+ '-',
+ '--allow-discards', # allow discards (aka TRIM) requests for device
+ 'open',
+ device,
+ mapping,
+ '--type', 'plain',
+ '--key-size', '256',
+ ]
+
+ process.call(command, stdin=key, terminal_verbose=True, show_command=True)
+
+
+def luks_open(key, device, mapping):
+ """
+ Decrypt (open) an encrypted device, previously prepared with cryptsetup
+
+ .. note: ceph-disk will require an additional b64decode call for this to work
+
+ :param key: dmcrypt secret key
+ :param device: absolute path to device
+ :param mapping: mapping name used to correlate device. Usually a UUID
+ """
+ command = [
+ 'cryptsetup',
+ '--key-size',
+ get_key_size_from_conf(),
+ '--key-file',
+ '-',
+ '--allow-discards', # allow discards (aka TRIM) requests for device
+ 'luksOpen',
+ device,
+ mapping,
+ ]
+ process.call(command, stdin=key, terminal_verbose=True, show_command=True)
+
+
+def dmcrypt_close(mapping):
+ """
+ Encrypt (close) a device, previously decrypted with cryptsetup
+
+ :param mapping:
+ """
+ if not os.path.exists(mapping):
+ logger.debug('device mapper path does not exist %s' % mapping)
+ logger.debug('will skip cryptsetup removal')
+ return
+ # don't be strict about the remove call, but still warn on the terminal if it fails
+ process.run(['cryptsetup', 'remove', mapping], stop_on_error=False)
+
+
+def get_dmcrypt_key(osd_id, osd_fsid, lockbox_keyring=None):
+ """
+ Retrieve the dmcrypt (secret) key stored initially on the monitor. The key
+ is sent initially with JSON, and the Monitor then mangles the name to
+ ``dm-crypt/osd/<fsid>/luks``
+
+ The ``lockbox.keyring`` file is required for this operation, and it is
+ assumed it will exist on the path for the same OSD that is being activated.
+ To support scanning, it is optionally configurable to a custom location
+ (e.g. inside a lockbox partition mounted in a temporary location)
+ """
+ if lockbox_keyring is None:
+ lockbox_keyring = '/var/lib/ceph/osd/%s-%s/lockbox.keyring' % (conf.cluster, osd_id)
+ name = 'client.osd-lockbox.%s' % osd_fsid
+ config_key = 'dm-crypt/osd/%s/luks' % osd_fsid
+
+ mlogger.info(f'Running ceph config-key get {config_key}')
+ stdout, stderr, returncode = process.call(
+ [
+ 'ceph',
+ '--cluster', conf.cluster,
+ '--name', name,
+ '--keyring', lockbox_keyring,
+ 'config-key',
+ 'get',
+ config_key
+ ],
+ show_command=True,
+ logfile_verbose=False
+ )
+ if returncode != 0:
+ raise RuntimeError('Unable to retrieve dmcrypt secret')
+ return ' '.join(stdout).strip()
+
+
+def write_lockbox_keyring(osd_id, osd_fsid, secret):
+ """
+ Helper to write the lockbox keyring. This is needed because the bluestore OSD will
+ not persist the keyring, and it can't be stored in the data device for filestore because
+ at the time this is needed, the device is encrypted.
+
+ For bluestore: A tmpfs filesystem is mounted, so the path can get written
+ to, but the files are ephemeral, which requires this file to be created
+ every time it is activated.
+ For filestore: The path for the OSD would exist at this point even if no
+ OSD data device is mounted, so the keyring is written to fetch the key, and
+ then the data device is mounted on that directory, making the keyring
+ "disappear".
+ """
+ if os.path.exists('/var/lib/ceph/osd/%s-%s/lockbox.keyring' % (conf.cluster, osd_id)):
+ return
+
+ name = 'client.osd-lockbox.%s' % osd_fsid
+ write_keyring(
+ osd_id,
+ secret,
+ keyring_name='lockbox.keyring',
+ name=name
+ )
+
+
+def status(device):
+ """
+ Capture the metadata information of a possibly encrypted device, returning
+ a dictionary with all the values found (if any).
+
+ An encrypted device will contain information about a device. Example
+ successful output looks like::
+
+ $ cryptsetup status /dev/mapper/ed6b5a26-eafe-4cd4-87e3-422ff61e26c4
+ /dev/mapper/ed6b5a26-eafe-4cd4-87e3-422ff61e26c4 is active and is in use.
+ type: LUKS1
+ cipher: aes-xts-plain64
+ keysize: 256 bits
+ device: /dev/sdc2
+ offset: 4096 sectors
+ size: 20740063 sectors
+ mode: read/write
+
+ As long as the mapper device is in 'open' state, the ``status`` call will work.
+
+ :param device: Absolute path or UUID of the device mapper
+ """
+ command = [
+ 'cryptsetup',
+ 'status',
+ device,
+ ]
+ out, err, code = process.call(command, show_command=True, verbose_on_failure=False)
+
+ metadata = {}
+ if code != 0:
+ logger.warning('failed to detect device mapper information')
+ return metadata
+ for line in out:
+ # get rid of lines that might not be useful to construct the report:
+ if not line.startswith(' '):
+ continue
+ try:
+ column, value = line.split(': ')
+ except ValueError:
+ continue
+ metadata[column.strip()] = value.strip().strip('"')
+ return metadata
+
+
+def legacy_encrypted(device):
+ """
+ Detect if a device was encrypted with ceph-disk or not. In the case of
+ encrypted devices, include the type of encryption (LUKS, or PLAIN), and
+ infer what the lockbox partition is.
+
+ This function assumes that ``device`` will be a partition.
+ """
+ if os.path.isdir(device):
+ mounts = system.Mounts(paths=True).get_mounts()
+ # yes, rebind the device variable here because a directory isn't going
+ # to help with parsing
+ device = mounts.get(device, [None])[0]
+ if not device:
+ raise RuntimeError('unable to determine the device mounted at %s' % device)
+ metadata = {'encrypted': False, 'type': None, 'lockbox': '', 'device': device}
+ # check if the device is online/decrypted first
+ active_mapper = status(device)
+ if active_mapper:
+ # normalize a bit to ensure same values regardless of source
+ metadata['type'] = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks
+ metadata['encrypted'] = True if metadata['type'] in ['plain', 'luks'] else False
+ # The true device is now available to this function, so it gets
+ # re-assigned here for the lockbox checks to succeed (it is not
+ # possible to guess partitions from a device mapper device otherwise
+ device = active_mapper.get('device', device)
+ metadata['device'] = device
+ else:
+ uuid = get_part_entry_type(device)
+ guid_match = constants.ceph_disk_guids.get(uuid, {})
+ encrypted_guid = guid_match.get('encrypted', False)
+ if encrypted_guid:
+ metadata['encrypted'] = True
+ metadata['type'] = guid_match['encryption_type']
+
+ # Lets find the lockbox location now, to do this, we need to find out the
+ # parent device name for the device so that we can query all of its
+ # associated devices and *then* look for one that has the 'lockbox' label
+ # on it. Thanks for being awesome ceph-disk
+ disk_meta = lsblk(device, abspath=True)
+ if not disk_meta:
+ return metadata
+ parent_device = disk_meta['PKNAME']
+ # With the parent device set, we can now look for the lockbox listing associated devices
+ devices = [Device(i['NAME']) for i in device_family(parent_device)]
+ for d in devices:
+ if d.ceph_disk.type == 'lockbox':
+ metadata['lockbox'] = d.path
+ break
+ return metadata
diff --git a/src/ceph-volume/ceph_volume/util/lsmdisk.py b/src/ceph-volume/ceph_volume/util/lsmdisk.py
new file mode 100644
index 000000000..607fd4f0a
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/lsmdisk.py
@@ -0,0 +1,196 @@
+"""
+This module handles the interaction with libstoragemgmt for local disk
+devices. Interaction may fail with LSM for a number of issues, but the
+intent here is to make this a soft fail, since LSM related data is not
+a critical component of ceph-volume.
+"""
+import logging
+
+try:
+ from lsm import LocalDisk, LsmError
+ from lsm import Disk as lsm_Disk
+except ImportError:
+ lsm_available = False
+ transport_map = {}
+ health_map = {}
+ lsm_Disk = None
+else:
+ lsm_available = True
+ transport_map = {
+ lsm_Disk.LINK_TYPE_UNKNOWN: "Unavailable",
+ lsm_Disk.LINK_TYPE_FC: "Fibre Channel",
+ lsm_Disk.LINK_TYPE_SSA: "IBM SSA",
+ lsm_Disk.LINK_TYPE_SBP: "Serial Bus",
+ lsm_Disk.LINK_TYPE_SRP: "SCSI RDMA",
+ lsm_Disk.LINK_TYPE_ISCSI: "iSCSI",
+ lsm_Disk.LINK_TYPE_SAS: "SAS",
+ lsm_Disk.LINK_TYPE_ADT: "ADT (Tape)",
+ lsm_Disk.LINK_TYPE_ATA: "ATA/SATA",
+ lsm_Disk.LINK_TYPE_USB: "USB",
+ lsm_Disk.LINK_TYPE_SOP: "SCSI over PCI-E",
+ lsm_Disk.LINK_TYPE_PCIE: "PCI-E",
+ }
+ health_map = {
+ lsm_Disk.HEALTH_STATUS_UNKNOWN: "Unknown",
+ lsm_Disk.HEALTH_STATUS_FAIL: "Fail",
+ lsm_Disk.HEALTH_STATUS_WARN: "Warn",
+ lsm_Disk.HEALTH_STATUS_GOOD: "Good",
+ }
+
+logger = logging.getLogger(__name__)
+
+
+class LSMDisk:
+ def __init__(self, dev_path):
+ self.dev_path = dev_path
+ self.error_list = set()
+
+ if lsm_available:
+ self.lsm_available = True
+ self.disk = LocalDisk()
+ else:
+ self.lsm_available = False
+ self.error_list.add("libstoragemgmt (lsm module) is unavailable")
+ logger.info("LSM information is unavailable: libstoragemgmt is not installed")
+ self.disk = None
+
+ self.led_bits = None
+
+ @property
+ def errors(self):
+ """show any errors that the LSM interaction has encountered (str)"""
+ return ", ".join(self.error_list)
+
+ def _query_lsm(self, func, path):
+ """Common method used to call the LSM functions, returning the function's result or None"""
+
+ # if disk is None, lsm is unavailable so all calls should return None
+ if self.disk is None:
+ return None
+
+ method = getattr(self.disk, func)
+ try:
+ output = method(path)
+ except LsmError as err:
+ logger.error("LSM Error: {}".format(err._msg))
+ self.error_list.add(err._msg)
+ return None
+ else:
+ return output
+
+ @property
+ def led_status(self):
+ """Fetch LED status, store in the LSMDisk object and return current status (int)"""
+ if self.led_bits is None:
+ self.led_bits = self._query_lsm('led_status_get', self.dev_path) or 1
+ return self.led_bits
+ else:
+ return self.led_bits
+
+ @property
+ def led_ident_state(self):
+ """Query a disks IDENT LED state to discover when it is On, Off or Unknown (str)"""
+ if self.led_status == 1:
+ return "Unsupported"
+ if self.led_status & lsm_Disk.LED_STATUS_IDENT_ON == lsm_Disk.LED_STATUS_IDENT_ON:
+ return "On"
+ elif self.led_status & lsm_Disk.LED_STATUS_IDENT_OFF == lsm_Disk.LED_STATUS_IDENT_OFF:
+ return "Off"
+ elif self.led_status & lsm_Disk.LED_STATUS_IDENT_UNKNOWN == lsm_Disk.LED_STATUS_IDENT_UNKNOWN:
+ return "Unknown"
+
+ return "Unsupported"
+
+ @property
+ def led_fault_state(self):
+ """Query a disks FAULT LED state to discover when it is On, Off or Unknown (str)"""
+ if self.led_status == 1:
+ return "Unsupported"
+ if self.led_status & lsm_Disk.LED_STATUS_FAULT_ON == lsm_Disk.LED_STATUS_FAULT_ON:
+ return "On"
+ elif self.led_status & lsm_Disk.LED_STATUS_FAULT_OFF == lsm_Disk.LED_STATUS_FAULT_OFF:
+ return "Off"
+ elif self.led_status & lsm_Disk.LED_STATUS_FAULT_UNKNOWN == lsm_Disk.LED_STATUS_FAULT_UNKNOWN:
+ return "Unknown"
+
+ return "Unsupported"
+
+ @property
+ def led_ident_support(self):
+ """Query the LED state to determine IDENT support: Unknown, Supported, Unsupported (str)"""
+ if self.led_status == 1:
+ return "Unknown"
+
+ ident_states = (
+ lsm_Disk.LED_STATUS_IDENT_ON +
+ lsm_Disk.LED_STATUS_IDENT_OFF +
+ lsm_Disk.LED_STATUS_IDENT_UNKNOWN
+ )
+
+ if (self.led_status & ident_states) == 0:
+ return "Unsupported"
+
+ return "Supported"
+
+ @property
+ def led_fault_support(self):
+ """Query the LED state to determine FAULT support: Unknown, Supported, Unsupported (str)"""
+ if self.led_status == 1:
+ return "Unknown"
+
+ fail_states = (
+ lsm_Disk.LED_STATUS_FAULT_ON +
+ lsm_Disk.LED_STATUS_FAULT_OFF +
+ lsm_Disk.LED_STATUS_FAULT_UNKNOWN
+ )
+
+ if self.led_status & fail_states == 0:
+ return "Unsupported"
+
+ return "Supported"
+
+ @property
+ def health(self):
+ """Determine the health of the disk from LSM : Unknown, Fail, Warn or Good (str)"""
+ _health_int = self._query_lsm('health_status_get', self.dev_path)
+ return health_map.get(_health_int, "Unknown")
+
+ @property
+ def transport(self):
+ """Translate a disks link type to a human readable format (str)"""
+ _link_type = self._query_lsm('link_type_get', self.dev_path)
+ return transport_map.get(_link_type, "Unknown")
+
+
+ @property
+ def media_type(self):
+ """Use the rpm value to determine the type of disk media: Flash or HDD (str)"""
+ _rpm = self._query_lsm('rpm_get', self.dev_path)
+ if _rpm is not None:
+ if _rpm == 0:
+ return "Flash"
+ elif _rpm > 1:
+ return "HDD"
+
+ return "Unknown"
+
+ def json_report(self):
+ """Return the LSM related metadata for the current local disk (dict)"""
+ if self.lsm_available:
+ return {
+ "serialNum": self._query_lsm('serial_num_get', self.dev_path) or "Unknown",
+ "transport": self.transport,
+ "mediaType": self.media_type,
+ "rpm": self._query_lsm('rpm_get', self.dev_path) or "Unknown",
+ "linkSpeed": self._query_lsm('link_speed_get', self.dev_path) or "Unknown",
+ "health": self.health,
+ "ledSupport": {
+ "IDENTsupport": self.led_ident_support,
+ "IDENTstatus": self.led_ident_state,
+ "FAILsupport": self.led_fault_support,
+ "FAILstatus": self.led_fault_state,
+ },
+ "errors": list(self.error_list)
+ }
+ else:
+ return {}
diff --git a/src/ceph-volume/ceph_volume/util/prepare.py b/src/ceph-volume/ceph_volume/util/prepare.py
new file mode 100644
index 000000000..323ba1e93
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/prepare.py
@@ -0,0 +1,535 @@
+"""
+These utilities for prepare provide all the pieces needed to prepare a device
+but also a compounded ("single call") helper to do them in order. Some plugins
+may want to change some part of the process, while others might want to consume
+the single-call helper
+"""
+import errno
+import os
+import logging
+import json
+import time
+from ceph_volume import process, conf, __release__, terminal
+from ceph_volume.util import system, constants, str_to_int, disk
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+
+def create_key():
+ stdout, stderr, returncode = process.call(
+ ['ceph-authtool', '--gen-print-key'],
+ show_command=True,
+ logfile_verbose=False)
+ if returncode != 0:
+ raise RuntimeError('Unable to generate a new auth key')
+ return ' '.join(stdout).strip()
+
+
+def write_keyring(osd_id, secret, keyring_name='keyring', name=None):
+ """
+ Create a keyring file with the ``ceph-authtool`` utility. Constructs the
+ path over well-known conventions for the OSD, and allows any other custom
+ ``name`` to be set.
+
+ :param osd_id: The ID for the OSD to be used
+ :param secret: The key to be added as (as a string)
+ :param name: Defaults to 'osd.{ID}' but can be used to add other client
+ names, specifically for 'lockbox' type of keys
+ :param keyring_name: Alternative keyring name, for supporting other
+ types of keys like for lockbox
+ """
+ osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name)
+ name = name or 'osd.%s' % str(osd_id)
+ mlogger.info(f'Creating keyring file for {name}')
+ process.call(
+ [
+ 'ceph-authtool', osd_keyring,
+ '--create-keyring',
+ '--name', name,
+ '--add-key', secret
+ ],
+ logfile_verbose=False)
+ system.chown(osd_keyring)
+
+
+def get_journal_size(lv_format=True):
+ """
+ Helper to retrieve the size (defined in megabytes in ceph.conf) to create
+ the journal logical volume, it "translates" the string into a float value,
+ then converts that into gigabytes, and finally (optionally) it formats it
+ back as a string so that it can be used for creating the LV.
+
+ :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
+ would result in '5G', otherwise it will return a ``Size`` object.
+ """
+ conf_journal_size = conf.ceph.get_safe('osd', 'osd_journal_size', '5120')
+ logger.debug('osd_journal_size set to %s' % conf_journal_size)
+ journal_size = disk.Size(mb=str_to_int(conf_journal_size))
+
+ if journal_size < disk.Size(gb=2):
+ mlogger.error('Refusing to continue with configured size for journal')
+ raise RuntimeError('journal sizes must be larger than 2GB, detected: %s' % journal_size)
+ if lv_format:
+ return '%sG' % journal_size.gb.as_int()
+ return journal_size
+
+
+def get_block_db_size(lv_format=True):
+ """
+ Helper to retrieve the size (defined in megabytes in ceph.conf) to create
+ the block.db logical volume, it "translates" the string into a float value,
+ then converts that into gigabytes, and finally (optionally) it formats it
+ back as a string so that it can be used for creating the LV.
+
+ :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
+ would result in '5G', otherwise it will return a ``Size`` object.
+
+ .. note: Configuration values are in bytes, unlike journals which
+ are defined in gigabytes
+ """
+ conf_db_size = None
+ try:
+ conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None)
+ except RuntimeError:
+ logger.exception("failed to load ceph configuration, will use defaults")
+
+ if not conf_db_size:
+ logger.debug(
+ 'block.db has no size configuration, will fallback to using as much as possible'
+ )
+ # TODO better to return disk.Size(b=0) here
+ return None
+ logger.debug('bluestore_block_db_size set to %s' % conf_db_size)
+ db_size = disk.Size(b=str_to_int(conf_db_size))
+
+ if db_size < disk.Size(gb=2):
+ mlogger.error('Refusing to continue with configured size for block.db')
+ raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size)
+ if lv_format:
+ return '%sG' % db_size.gb.as_int()
+ return db_size
+
+def get_block_wal_size(lv_format=True):
+ """
+ Helper to retrieve the size (defined in megabytes in ceph.conf) to create
+ the block.wal logical volume, it "translates" the string into a float value,
+ then converts that into gigabytes, and finally (optionally) it formats it
+ back as a string so that it can be used for creating the LV.
+
+ :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
+ would result in '5G', otherwise it will return a ``Size`` object.
+
+ .. note: Configuration values are in bytes, unlike journals which
+ are defined in gigabytes
+ """
+ conf_wal_size = None
+ try:
+ conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None)
+ except RuntimeError:
+ logger.exception("failed to load ceph configuration, will use defaults")
+
+ if not conf_wal_size:
+ logger.debug(
+ 'block.wal has no size configuration, will fallback to using as much as possible'
+ )
+ return None
+ logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size)
+ wal_size = disk.Size(b=str_to_int(conf_wal_size))
+
+ if wal_size < disk.Size(gb=2):
+ mlogger.error('Refusing to continue with configured size for block.wal')
+ raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size)
+ if lv_format:
+ return '%sG' % wal_size.gb.as_int()
+ return wal_size
+
+
+def create_id(fsid, json_secrets, osd_id=None):
+ """
+ :param fsid: The osd fsid to create, always required
+ :param json_secrets: a json-ready object with whatever secrets are wanted
+ to be passed to the monitor
+ :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the
+ id does not exist in the cluster a new ID will be created
+ """
+ bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
+ cmd = [
+ 'ceph',
+ '--cluster', conf.cluster,
+ '--name', 'client.bootstrap-osd',
+ '--keyring', bootstrap_keyring,
+ '-i', '-',
+ 'osd', 'new', fsid
+ ]
+ if osd_id is not None:
+ if osd_id_available(osd_id):
+ cmd.append(osd_id)
+ else:
+ raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id))
+ stdout, stderr, returncode = process.call(
+ cmd,
+ stdin=json_secrets,
+ show_command=True
+ )
+ if returncode != 0:
+ raise RuntimeError('Unable to create a new OSD id')
+ return ' '.join(stdout).strip()
+
+
+def osd_id_available(osd_id):
+ """
+ Checks to see if an osd ID exists and if it's available for
+ reuse. Returns True if it is, False if it isn't.
+
+ :param osd_id: The osd ID to check
+ """
+ if osd_id is None:
+ return False
+
+ bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
+ stdout, stderr, returncode = process.call(
+ [
+ 'ceph',
+ '--cluster', conf.cluster,
+ '--name', 'client.bootstrap-osd',
+ '--keyring', bootstrap_keyring,
+ 'osd',
+ 'tree',
+ '-f', 'json',
+ ],
+ show_command=True
+ )
+ if returncode != 0:
+ raise RuntimeError('Unable check if OSD id exists: %s' % osd_id)
+
+ output = json.loads(''.join(stdout).strip())
+ osds = output['nodes']
+ osd = [osd for osd in osds if str(osd['id']) == str(osd_id)]
+ if not osd or (osd and osd[0].get('status') == "destroyed"):
+ return True
+ return False
+
+
+def mount_tmpfs(path):
+ process.run([
+ 'mount',
+ '-t',
+ 'tmpfs', 'tmpfs',
+ path
+ ])
+
+ # Restore SELinux context
+ system.set_context(path)
+
+
+def create_osd_path(osd_id, tmpfs=False):
+ path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+ system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id))
+ if tmpfs:
+ mount_tmpfs(path)
+
+
+def format_device(device):
+ # only supports xfs
+ command = ['mkfs', '-t', 'xfs']
+
+ # get the mkfs options if any for xfs,
+ # fallback to the default options defined in constants.mkfs
+ flags = conf.ceph.get_list(
+ 'osd',
+ 'osd_mkfs_options_xfs',
+ default=constants.mkfs.get('xfs'),
+ split=' ',
+ )
+
+ # always force
+ if '-f' not in flags:
+ flags.insert(0, '-f')
+
+ command.extend(flags)
+ command.append(device)
+ process.run(command)
+
+
+def _normalize_mount_flags(flags, extras=None):
+ """
+ Mount flag options have to be a single string, separated by a comma. If the
+ flags are separated by spaces, or with commas and spaces in ceph.conf, the
+ mount options will be passed incorrectly.
+
+ This will help when parsing ceph.conf values return something like::
+
+ ["rw,", "exec,"]
+
+ Or::
+
+ [" rw ,", "exec"]
+
+ :param flags: A list of flags, or a single string of mount flags
+ :param extras: Extra set of mount flags, useful when custom devices like VDO need
+ ad-hoc mount configurations
+ """
+ # Instead of using set(), we append to this new list here, because set()
+ # will create an arbitrary order on the items that is made worst when
+ # testing with tools like tox that includes a randomizer seed. By
+ # controlling the order, it is easier to correctly assert the expectation
+ unique_flags = []
+ if isinstance(flags, list):
+ if extras:
+ flags.extend(extras)
+
+ # ensure that spaces and commas are removed so that they can join
+ # correctly, remove duplicates
+ for f in flags:
+ if f and f not in unique_flags:
+ unique_flags.append(f.strip().strip(','))
+ return ','.join(unique_flags)
+
+ # split them, clean them, and join them back again
+ flags = flags.strip().split(' ')
+ if extras:
+ flags.extend(extras)
+
+ # remove possible duplicates
+ for f in flags:
+ if f and f not in unique_flags:
+ unique_flags.append(f.strip().strip(','))
+ flags = ','.join(unique_flags)
+ # Before returning, split them again, since strings can be mashed up
+ # together, preventing removal of duplicate entries
+ return ','.join(set(flags.split(',')))
+
+
+def mount_osd(device, osd_id, **kw):
+ extras = []
+ is_vdo = kw.get('is_vdo', '0')
+ if is_vdo == '1':
+ extras = ['discard']
+ destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+ command = ['mount', '-t', 'xfs', '-o']
+ flags = conf.ceph.get_list(
+ 'osd',
+ 'osd_mount_options_xfs',
+ default=constants.mount.get('xfs'),
+ split=' ',
+ )
+ command.append(
+ _normalize_mount_flags(flags, extras=extras)
+ )
+ command.append(device)
+ command.append(destination)
+ process.run(command)
+
+ # Restore SELinux context
+ system.set_context(destination)
+
+
+def _link_device(device, device_type, osd_id):
+ """
+ Allow linking any device type in an OSD directory. ``device`` must the be
+ source, with an absolute path and ``device_type`` will be the destination
+ name, like 'journal', or 'block'
+ """
+ device_path = '/var/lib/ceph/osd/%s-%s/%s' % (
+ conf.cluster,
+ osd_id,
+ device_type
+ )
+ command = ['ln', '-s', device, device_path]
+ system.chown(device)
+
+ process.run(command)
+
+def _validate_bluestore_device(device, excepted_device_type, osd_uuid):
+ """
+ Validate whether the given device is truly what it is supposed to be
+ """
+
+ out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device])
+ if err:
+ terminal.error('ceph-bluestore-tool failed to run. %s'% err)
+ raise SystemExit(1)
+ if ret:
+ terminal.error('no label on %s'% device)
+ raise SystemExit(1)
+ oj = json.loads(''.join(out))
+ if device not in oj:
+ terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device)
+ raise SystemExit(1)
+ current_device_type = oj[device]['description']
+ if current_device_type != excepted_device_type:
+ terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type))
+ raise SystemExit(1)
+ current_osd_uuid = oj[device]['osd_uuid']
+ if current_osd_uuid != osd_uuid:
+ terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid))
+ raise SystemExit(1)
+
+def link_journal(journal_device, osd_id):
+ _link_device(journal_device, 'journal', osd_id)
+
+
+def link_block(block_device, osd_id):
+ _link_device(block_device, 'block', osd_id)
+
+
+def link_wal(wal_device, osd_id, osd_uuid=None):
+ _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid)
+ _link_device(wal_device, 'block.wal', osd_id)
+
+
+def link_db(db_device, osd_id, osd_uuid=None):
+ _validate_bluestore_device(db_device, 'bluefs db', osd_uuid)
+ _link_device(db_device, 'block.db', osd_id)
+
+
+def get_monmap(osd_id):
+ """
+ Before creating the OSD files, a monmap needs to be retrieved so that it
+ can be used to tell the monitor(s) about the new OSD. A call will look like::
+
+ ceph --cluster ceph --name client.bootstrap-osd \
+ --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \
+ mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
+ """
+ path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
+ bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
+ monmap_destination = os.path.join(path, 'activate.monmap')
+
+ process.run([
+ 'ceph',
+ '--cluster', conf.cluster,
+ '--name', 'client.bootstrap-osd',
+ '--keyring', bootstrap_keyring,
+ 'mon', 'getmap', '-o', monmap_destination
+ ])
+
+
+def get_osdspec_affinity():
+ return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '')
+
+
+def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False):
+ """
+ Create the files for the OSD to function. A normal call will look like:
+
+ ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
+ --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
+ --osd-data /var/lib/ceph/osd/ceph-0 \
+ --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
+ --keyring /var/lib/ceph/osd/ceph-0/keyring \
+ --setuser ceph --setgroup ceph
+
+ In some cases it is required to use the keyring, when it is passed in as
+ a keyword argument it is used as part of the ceph-osd command
+ """
+ path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
+ monmap = os.path.join(path, 'activate.monmap')
+
+ system.chown(path)
+
+ base_command = [
+ 'ceph-osd',
+ '--cluster', conf.cluster,
+ '--osd-objectstore', 'bluestore',
+ '--mkfs',
+ '-i', osd_id,
+ '--monmap', monmap,
+ ]
+
+ supplementary_command = [
+ '--osd-data', path,
+ '--osd-uuid', fsid,
+ '--setuser', 'ceph',
+ '--setgroup', 'ceph'
+ ]
+
+ if keyring is not None:
+ base_command.extend(['--keyfile', '-'])
+
+ if wal:
+ base_command.extend(
+ ['--bluestore-block-wal-path', wal]
+ )
+ system.chown(wal)
+
+ if db:
+ base_command.extend(
+ ['--bluestore-block-db-path', db]
+ )
+ system.chown(db)
+
+ if get_osdspec_affinity():
+ base_command.extend(['--osdspec-affinity', get_osdspec_affinity()])
+
+ command = base_command + supplementary_command
+
+ """
+ When running in containers the --mkfs on raw device sometimes fails
+ to acquire a lock through flock() on the device because systemd-udevd holds one temporarily.
+ See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock.
+ Because this is really transient, we retry up to 5 times and wait for 1 sec in-between
+ """
+ for retry in range(5):
+ _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True)
+ if returncode == 0:
+ break
+ else:
+ if returncode == errno.EWOULDBLOCK:
+ time.sleep(1)
+ logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry)
+ continue
+ else:
+ raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
+
+
+def osd_mkfs_filestore(osd_id, fsid, keyring):
+ """
+ Create the files for the OSD to function. A normal call will look like:
+
+ ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
+ --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
+ --osd-data /var/lib/ceph/osd/ceph-0 \
+ --osd-journal /var/lib/ceph/osd/ceph-0/journal \
+ --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
+ --keyring /var/lib/ceph/osd/ceph-0/keyring \
+ --setuser ceph --setgroup ceph
+
+ """
+ path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
+ monmap = os.path.join(path, 'activate.monmap')
+ journal = os.path.join(path, 'journal')
+
+ system.chown(journal)
+ system.chown(path)
+
+ command = [
+ 'ceph-osd',
+ '--cluster', conf.cluster,
+ '--osd-objectstore', 'filestore',
+ '--mkfs',
+ '-i', osd_id,
+ '--monmap', monmap,
+ ]
+
+ if get_osdspec_affinity():
+ command.extend(['--osdspec-affinity', get_osdspec_affinity()])
+
+ if __release__ != 'luminous':
+ # goes through stdin
+ command.extend(['--keyfile', '-'])
+
+ command.extend([
+ '--osd-data', path,
+ '--osd-journal', journal,
+ '--osd-uuid', fsid,
+ '--setuser', 'ceph',
+ '--setgroup', 'ceph'
+ ])
+
+ _, _, returncode = process.call(
+ command, stdin=keyring, terminal_verbose=True, show_command=True
+ )
+ if returncode != 0:
+ raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
diff --git a/src/ceph-volume/ceph_volume/util/system.py b/src/ceph-volume/ceph_volume/util/system.py
new file mode 100644
index 000000000..590a0599b
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/system.py
@@ -0,0 +1,419 @@
+import errno
+import logging
+import os
+import pwd
+import platform
+import tempfile
+import uuid
+import subprocess
+import threading
+from ceph_volume import process, terminal
+from . import as_string
+
+# python2 has no FileNotFoundError
+try:
+ FileNotFoundError
+except NameError:
+ FileNotFoundError = OSError
+
+logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
+
+# TODO: get these out of here and into a common area for others to consume
+if platform.system() == 'FreeBSD':
+ FREEBSD = True
+ DEFAULT_FS_TYPE = 'zfs'
+ PROCDIR = '/compat/linux/proc'
+ # FreeBSD does not have blockdevices any more
+ BLOCKDIR = '/dev'
+ ROOTGROUP = 'wheel'
+else:
+ FREEBSD = False
+ DEFAULT_FS_TYPE = 'xfs'
+ PROCDIR = '/proc'
+ BLOCKDIR = '/sys/block'
+ ROOTGROUP = 'root'
+
+host_rootfs = '/rootfs'
+run_host_cmd = [
+ 'nsenter',
+ '--mount={}/proc/1/ns/mnt'.format(host_rootfs),
+ '--ipc={}/proc/1/ns/ipc'.format(host_rootfs),
+ '--net={}/proc/1/ns/net'.format(host_rootfs),
+ '--uts={}/proc/1/ns/uts'.format(host_rootfs)
+]
+
+def generate_uuid():
+ return str(uuid.uuid4())
+
+def find_executable_on_host(locations=[], executable='', binary_check='/bin/ls'):
+ paths = ['{}/{}'.format(location, executable) for location in locations]
+ command = []
+ command.extend(run_host_cmd + [binary_check] + paths)
+ process = subprocess.Popen(
+ command,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ close_fds=True
+ )
+ stdout = as_string(process.stdout.read())
+ if stdout:
+ executable_on_host = stdout.split('\n')[0]
+ logger.info('Executable {} found on the host, will use {}'.format(executable, executable_on_host))
+ return executable_on_host
+ else:
+ logger.warning('Executable {} not found on the host, will return {} as-is'.format(executable, executable))
+ return executable
+
+def which(executable, run_on_host=False):
+ """find the location of an executable"""
+ def _get_path(executable, locations):
+ for location in locations:
+ executable_path = os.path.join(location, executable)
+ if os.path.exists(executable_path) and os.path.isfile(executable_path):
+ return executable_path
+ return None
+
+ static_locations = (
+ '/usr/local/bin',
+ '/bin',
+ '/usr/bin',
+ '/usr/local/sbin',
+ '/usr/sbin',
+ '/sbin',
+ )
+
+ if not run_on_host:
+ path = os.getenv('PATH', '')
+ path_locations = path.split(':')
+ exec_in_path = _get_path(executable, path_locations)
+ if exec_in_path:
+ return exec_in_path
+ mlogger.warning('Executable {} not in PATH: {}'.format(executable, path))
+
+ exec_in_static_locations = _get_path(executable, static_locations)
+ if exec_in_static_locations:
+ mlogger.warning('Found executable under {}, please ensure $PATH is set correctly!'.format(exec_in_static_locations))
+ return exec_in_static_locations
+ else:
+ executable = find_executable_on_host(static_locations, executable)
+
+ # At this point, either `find_executable_on_host()` found an executable on the host
+ # or we fallback to just returning the argument as-is, to prevent a hard fail, and
+ # hoping that the system might have the executable somewhere custom
+ return executable
+
+def get_ceph_user_ids():
+ """
+ Return the id and gid of the ceph user
+ """
+ try:
+ user = pwd.getpwnam('ceph')
+ except KeyError:
+ # is this even possible?
+ raise RuntimeError('"ceph" user is not available in the current system')
+ return user[2], user[3]
+
+
+def get_file_contents(path, default=''):
+ contents = default
+ if not os.path.exists(path):
+ return contents
+ try:
+ with open(path, 'r') as open_file:
+ contents = open_file.read().strip()
+ except Exception:
+ logger.exception('Failed to read contents from: %s' % path)
+
+ return contents
+
+
+def mkdir_p(path, chown=True):
+ """
+ A `mkdir -p` that defaults to chown the path to the ceph user
+ """
+ try:
+ os.mkdir(path)
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+ if chown:
+ uid, gid = get_ceph_user_ids()
+ os.chown(path, uid, gid)
+
+
+def chown(path, recursive=True):
+ """
+ ``chown`` a path to the ceph user (uid and guid fetched at runtime)
+ """
+ uid, gid = get_ceph_user_ids()
+ if os.path.islink(path):
+ process.run(['chown', '-h', 'ceph:ceph', path])
+ path = os.path.realpath(path)
+ if recursive:
+ process.run(['chown', '-R', 'ceph:ceph', path])
+ else:
+ os.chown(path, uid, gid)
+
+
+def is_binary(path):
+ """
+ Detect if a file path is a binary or not. Will falsely report as binary
+ when utf-16 encoded. In the ceph universe there is no such risk (yet)
+ """
+ with open(path, 'rb') as fp:
+ contents = fp.read(8192)
+ if b'\x00' in contents: # a null byte may signal binary
+ return True
+ return False
+
+
+class tmp_mount(object):
+ """
+ Temporarily mount a device on a temporary directory,
+ and unmount it upon exit
+
+ When ``encrypted`` is set to ``True``, the exit method will call out to
+ close the device so that it doesn't remain open after mounting. It is
+ assumed that it will be open because otherwise it wouldn't be possible to
+ mount in the first place
+ """
+
+ def __init__(self, device, encrypted=False):
+ self.device = device
+ self.path = None
+ self.encrypted = encrypted
+
+ def __enter__(self):
+ self.path = tempfile.mkdtemp()
+ process.run([
+ 'mount',
+ '-v',
+ self.device,
+ self.path
+ ])
+ return self.path
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ process.run([
+ 'umount',
+ '-v',
+ self.path
+ ])
+ if self.encrypted:
+ # avoid a circular import from the encryption module
+ from ceph_volume.util import encryption
+ encryption.dmcrypt_close(self.device)
+
+
+def unmount_tmpfs(path):
+ """
+ Removes the mount at the given path iff the path is a tmpfs mount point.
+ Otherwise no action is taken.
+ """
+ _out, _err, rc = process.call(['findmnt', '-t', 'tmpfs', '-M', path])
+ if rc != 0:
+ logger.info('{} does not appear to be a tmpfs mount'.format(path))
+ else:
+ logger.info('Unmounting tmpfs path at {}'.format( path))
+ unmount(path)
+
+
+def unmount(path):
+ """
+ Removes mounts at the given path
+ """
+ process.run([
+ 'umount',
+ '-v',
+ path,
+ ])
+
+
+def path_is_mounted(path, destination=None):
+ """
+ Check if the given path is mounted
+ """
+ m = Mounts(paths=True)
+ mounts = m.get_mounts()
+ realpath = os.path.realpath(path)
+ mounted_locations = mounts.get(realpath, [])
+
+ if destination:
+ return destination in mounted_locations
+ return mounted_locations != []
+
+
+def device_is_mounted(dev, destination=None):
+ """
+ Check if the given device is mounted, optionally validating that a
+ destination exists
+ """
+ plain_mounts = Mounts(devices=True)
+ realpath_mounts = Mounts(devices=True, realpath=True)
+
+ realpath_dev = os.path.realpath(dev) if dev.startswith('/') else dev
+ destination = os.path.realpath(destination) if destination else None
+ # plain mounts
+ plain_dev_mounts = plain_mounts.get_mounts().get(dev, [])
+ realpath_dev_mounts = plain_mounts.get_mounts().get(realpath_dev, [])
+ # realpath mounts
+ plain_dev_real_mounts = realpath_mounts.get_mounts().get(dev, [])
+ realpath_dev_real_mounts = realpath_mounts.get_mounts().get(realpath_dev, [])
+
+ mount_locations = [
+ plain_dev_mounts,
+ realpath_dev_mounts,
+ plain_dev_real_mounts,
+ realpath_dev_real_mounts
+ ]
+
+ for mounts in mount_locations:
+ if mounts: # we have a matching mount
+ if destination:
+ if destination in mounts:
+ logger.info(
+ '%s detected as mounted, exists at destination: %s', dev, destination
+ )
+ return True
+ else:
+ logger.info('%s was found as mounted', dev)
+ return True
+ logger.info('%s was not found as mounted', dev)
+ return False
+
+class Mounts(object):
+ excluded_paths = []
+
+ def __init__(self, devices=False, paths=False, realpath=False):
+ self.devices = devices
+ self.paths = paths
+ self.realpath = realpath
+
+ def safe_realpath(self, path, timeout=0.2):
+ def _realpath(path, result):
+ p = os.path.realpath(path)
+ result.append(p)
+
+ result = []
+ t = threading.Thread(target=_realpath, args=(path, result))
+ t.setDaemon(True)
+ t.start()
+ t.join(timeout)
+ if t.is_alive():
+ return None
+ return result[0]
+
+ def get_mounts(self):
+ """
+ Create a mapping of all available system mounts so that other helpers can
+ detect nicely what path or device is mounted
+
+ It ignores (most of) non existing devices, but since some setups might need
+ some extra device information, it will make an exception for:
+
+ - tmpfs
+ - devtmpfs
+ - /dev/root
+
+ If ``devices`` is set to ``True`` the mapping will be a device-to-path(s),
+ if ``paths`` is set to ``True`` then the mapping will be
+ a path-to-device(s)
+
+ :param realpath: Resolve devices to use their realpaths. This is useful for
+ paths like LVM where more than one path can point to the same device
+ """
+ devices_mounted = {}
+ paths_mounted = {}
+ do_not_skip = ['tmpfs', 'devtmpfs', '/dev/root']
+ default_to_devices = self.devices is False and self.paths is False
+
+
+ with open(PROCDIR + '/mounts', 'rb') as mounts:
+ proc_mounts = mounts.readlines()
+
+ for line in proc_mounts:
+ fields = [as_string(f) for f in line.split()]
+ if len(fields) < 3:
+ continue
+ if fields[0] in Mounts.excluded_paths or \
+ fields[1] in Mounts.excluded_paths:
+ continue
+ if self.realpath:
+ if fields[0].startswith('/'):
+ device = self.safe_realpath(fields[0])
+ if device is None:
+ logger.warning(f"Can't get realpath on {fields[0]}, skipping.")
+ Mounts.excluded_paths.append(fields[0])
+ continue
+ else:
+ device = fields[0]
+ else:
+ device = fields[0]
+ path = self.safe_realpath(fields[1])
+ if path is None:
+ logger.warning(f"Can't get realpath on {fields[1]}, skipping.")
+ Mounts.excluded_paths.append(fields[1])
+ continue
+ # only care about actual existing devices
+ if not os.path.exists(device) or not device.startswith('/'):
+ if device not in do_not_skip:
+ continue
+ if device in devices_mounted.keys():
+ devices_mounted[device].append(path)
+ else:
+ devices_mounted[device] = [path]
+ if path in paths_mounted.keys():
+ paths_mounted[path].append(device)
+ else:
+ paths_mounted[path] = [device]
+
+ # Default to returning information for devices if
+ if self.devices is True or default_to_devices:
+ return devices_mounted
+ else:
+ return paths_mounted
+
+
+def set_context(path, recursive=False):
+ """
+ Calls ``restorecon`` to set the proper context on SELinux systems. Only if
+ the ``restorecon`` executable is found anywhere in the path it will get
+ called.
+
+ If the ``CEPH_VOLUME_SKIP_RESTORECON`` environment variable is set to
+ any of: "1", "true", "yes" the call will be skipped as well.
+
+ Finally, if SELinux is not enabled, or not available in the system,
+ ``restorecon`` will not be called. This is checked by calling out to the
+ ``selinuxenabled`` executable. If that tool is not installed or returns
+ a non-zero exit status then no further action is taken and this function
+ will return.
+ """
+ skip = os.environ.get('CEPH_VOLUME_SKIP_RESTORECON', '')
+ if skip.lower() in ['1', 'true', 'yes']:
+ logger.info(
+ 'CEPH_VOLUME_SKIP_RESTORECON environ is set, will not call restorecon'
+ )
+ return
+
+ try:
+ stdout, stderr, code = process.call(['selinuxenabled'],
+ verbose_on_failure=False)
+ except FileNotFoundError:
+ logger.info('No SELinux found, skipping call to restorecon')
+ return
+
+ if code != 0:
+ logger.info('SELinux is not enabled, will not call restorecon')
+ return
+
+ # restore selinux context to default policy values
+ if which('restorecon').startswith('/'):
+ if recursive:
+ process.run(['restorecon', '-R', path])
+ else:
+ process.run(['restorecon', path])
diff --git a/src/ceph-volume/ceph_volume/util/templates.py b/src/ceph-volume/ceph_volume/util/templates.py
new file mode 100644
index 000000000..85a366d26
--- /dev/null
+++ b/src/ceph-volume/ceph_volume/util/templates.py
@@ -0,0 +1,49 @@
+
+osd_header = """
+{:-^100}""".format('')
+
+
+osd_component_titles = """
+ Type Path LV Size % of device"""
+
+
+osd_reused_id = """
+ OSD id {id_: <55}"""
+
+
+osd_component = """
+ {_type: <15} {path: <55} {size: <15} {percent:.2%}"""
+
+
+osd_encryption = """
+ encryption: {enc: <15}"""
+
+
+total_osds = """
+Total OSDs: {total_osds}
+"""
+
+
+def filtered_devices(devices):
+ string = """
+Filtered Devices:"""
+ for device, info in devices.items():
+ string += """
+ %s""" % device
+
+ for reason in info['reasons']:
+ string += """
+ %s""" % reason
+
+ string += "\n"
+ return string
+
+
+ssd_volume_group = """
+Solid State VG:
+ Targets: {target: <25} Total size: {total_lv_size: <25}
+ Total LVs: {total_lvs: <25} Size per LV: {lv_size: <25}
+ Devices: {block_db_devices}
+"""
+
+