diff options
Diffstat (limited to '')
-rw-r--r-- | src/ceph-volume/ceph_volume/util/__init__.py | 108 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/arg_validators.py | 234 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/constants.py | 46 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/device.py | 722 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/disk.py | 943 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/encryption.py | 294 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/lsmdisk.py | 196 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/prepare.py | 460 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/system.py | 419 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/templates.py | 49 |
10 files changed, 3471 insertions, 0 deletions
diff --git a/src/ceph-volume/ceph_volume/util/__init__.py b/src/ceph-volume/ceph_volume/util/__init__.py new file mode 100644 index 000000000..1b5afe970 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/__init__.py @@ -0,0 +1,108 @@ +import logging +from math import floor +from ceph_volume import terminal + +try: + input = raw_input # pylint: disable=redefined-builtin +except NameError: + pass + +logger = logging.getLogger(__name__) + + +def as_string(string): + """ + Ensure that whatever type of string is incoming, it is returned as an + actual string, versus 'bytes' which Python 3 likes to use. + """ + if isinstance(string, bytes): + # we really ignore here if we can't properly decode with utf-8 + return string.decode('utf-8', 'ignore') + return string + + +def as_bytes(string): + """ + Ensure that whatever type of string is incoming, it is returned as bytes, + encoding to utf-8 otherwise + """ + if isinstance(string, bytes): + return string + return string.encode('utf-8', errors='ignore') + + +def str_to_int(string, round_down=True): + """ + Parses a string number into an integer, optionally converting to a float + and rounding down. + + Some LVM values may come with a comma instead of a dot to define decimals. + This function normalizes a comma into a dot + """ + error_msg = "Unable to convert to integer: '%s'" % str(string) + try: + integer = float(string.replace(',', '.')) + except AttributeError: + # this might be a integer already, so try to use it, otherwise raise + # the original exception + if isinstance(string, (int, float)): + integer = string + else: + logger.exception(error_msg) + raise RuntimeError(error_msg) + except (TypeError, ValueError): + logger.exception(error_msg) + raise RuntimeError(error_msg) + + if round_down: + integer = floor(integer) + else: + integer = round(integer) + return int(integer) + + +def str_to_bool(val): + """ + Convert a string representation of truth to True or False + + True values are 'y', 'yes', or ''; case-insensitive + False values are 'n', or 'no'; case-insensitive + Raises ValueError if 'val' is anything else. + """ + true_vals = ['yes', 'y', ''] + false_vals = ['no', 'n'] + try: + val = val.lower() + except AttributeError: + val = str(val).lower() + if val in true_vals: + return True + elif val in false_vals: + return False + else: + raise ValueError("Invalid input value: %s" % val) + + +def prompt_bool(question, input_=None): + """ + Interface to prompt a boolean (or boolean-like) response from a user. + Usually a confirmation. + """ + input_prompt = input_ or input + prompt_format = '--> {question} '.format(question=question) + response = input_prompt(prompt_format) + try: + return str_to_bool(response) + except ValueError: + terminal.error('Valid true responses are: y, yes, <Enter>') + terminal.error('Valid false responses are: n, no') + terminal.error('That response was invalid, please try again') + return prompt_bool(question, input_=input_prompt) + +def merge_dict(x, y): + """ + Return two dicts merged + """ + z = x.copy() + z.update(y) + return z
\ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/util/arg_validators.py b/src/ceph-volume/ceph_volume/util/arg_validators.py new file mode 100644 index 000000000..1abb5165e --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/arg_validators.py @@ -0,0 +1,234 @@ +import argparse +import os +import math +from ceph_volume import terminal, decorators, process +from ceph_volume.util.device import Device +from ceph_volume.util import disk + + +def valid_osd_id(val): + return str(int(val)) + +class ValidDevice(object): + + def __init__(self, as_string=False, gpt_ok=False): + self.as_string = as_string + self.gpt_ok = gpt_ok + + def __call__(self, dev_path): + self.get_device(dev_path) + self._validated_device = self._is_valid_device() + return self._format_device(self._validated_device) + + def get_device(self, dev_path): + self._device = Device(dev_path) + self.dev_path = dev_path + + def _format_device(self, device): + if self.as_string: + if device.is_lv: + # all codepaths expect an lv path to be returned in this format + return "{}/{}".format(device.vg_name, device.lv_name) + return device.path + return device + + def _is_valid_device(self): + error = None + if not self._device.exists: + error = "Unable to proceed with non-existing device: %s" % self.dev_path + # FIXME this is not a nice API, this validator was meant to catch any + # non-existing devices upfront, not check for gpt headers. Now this + # needs to optionally skip checking gpt headers which is beyond + # verifying if the device exists. The better solution would be to + # configure this with a list of checks that can be excluded/included on + # __init__ + elif self._device.has_gpt_headers and not self.gpt_ok: + error = "GPT headers found, they must be removed on: %s" % self.dev_path + if self._device.has_partitions: + raise RuntimeError("Device {} has partitions.".format(self.dev_path)) + if error: + raise argparse.ArgumentError(None, error) + return self._device + + +class ValidZapDevice(ValidDevice): + def __call__(self, dev_path): + super().get_device(dev_path) + return self._format_device(self._is_valid_device()) + + def _is_valid_device(self, raise_sys_exit=True): + super()._is_valid_device() + return self._device + + +class ValidDataDevice(ValidDevice): + def __call__(self, dev_path): + super().get_device(dev_path) + return self._format_device(self._is_valid_device()) + + def _is_valid_device(self, raise_sys_exit=True): + super()._is_valid_device() + if self._device.used_by_ceph: + terminal.info('Device {} is already prepared'.format(self.dev_path)) + if raise_sys_exit: + raise SystemExit(0) + if self._device.has_fs and not self._device.used_by_ceph: + raise RuntimeError("Device {} has a filesystem.".format(self.dev_path)) + if self.dev_path[0] == '/' and disk.has_bluestore_label(self.dev_path): + raise RuntimeError("Device {} has bluestore signature.".format(self.dev_path)) + return self._device + +class ValidRawDevice(ValidDevice): + def __call__(self, dev_path): + super().get_device(dev_path) + return self._format_device(self._is_valid_device()) + + def _is_valid_device(self, raise_sys_exit=True): + out, err, rc = process.call([ + 'ceph-bluestore-tool', 'show-label', + '--dev', self.dev_path], verbose_on_failure=False) + if not rc: + terminal.info("Raw device {} is already prepared.".format(self.dev_path)) + raise SystemExit(0) + if disk.blkid(self.dev_path).get('TYPE') == 'crypto_LUKS': + terminal.info("Raw device {} might already be in use for a dmcrypt OSD, skipping.".format(self.dev_path)) + raise SystemExit(0) + super()._is_valid_device() + return self._device + +class ValidBatchDevice(ValidDevice): + def __call__(self, dev_path): + super().get_device(dev_path) + return self._format_device(self._is_valid_device()) + + def _is_valid_device(self, raise_sys_exit=False): + super()._is_valid_device() + if self._device.is_partition: + raise argparse.ArgumentError( + None, + '{} is a partition, please pass ' + 'LVs or raw block devices'.format(self.dev_path)) + return self._device + + +class ValidBatchDataDevice(ValidBatchDevice, ValidDataDevice): + def __call__(self, dev_path): + super().get_device(dev_path) + return self._format_device(self._is_valid_device()) + + def _is_valid_device(self): + # if device is already used by ceph, + # leave the validation to Batch.get_deployment_layout() + # This way the idempotency isn't broken (especially when using --osds-per-device) + for lv in self._device.lvs: + if lv.tags.get('ceph.type') in ['db', 'wal']: + return self._device + if self._device.used_by_ceph: + return self._device + super()._is_valid_device(raise_sys_exit=False) + return self._device + + +class OSDPath(object): + """ + Validate path exists and it looks like an OSD directory. + """ + + @decorators.needs_root + def __call__(self, string): + if not os.path.exists(string): + error = "Path does not exist: %s" % string + raise argparse.ArgumentError(None, error) + + arg_is_partition = disk.is_partition(string) + if arg_is_partition: + return os.path.abspath(string) + absolute_path = os.path.abspath(string) + if not os.path.isdir(absolute_path): + error = "Argument is not a directory or device which is required to scan" + raise argparse.ArgumentError(None, error) + key_files = ['ceph_fsid', 'fsid', 'keyring', 'ready', 'type', 'whoami'] + dir_files = os.listdir(absolute_path) + for key_file in key_files: + if key_file not in dir_files: + terminal.error('All following files must exist in path: %s' % ' '.join(key_files)) + error = "Required file (%s) was not found in OSD dir path: %s" % ( + key_file, + absolute_path + ) + raise argparse.ArgumentError(None, error) + + return os.path.abspath(string) + + +def exclude_group_options(parser, groups, argv=None): + """ + ``argparse`` has the ability to check for mutually exclusive options, but + it only allows a basic XOR behavior: only one flag can be used from + a defined group of options. This doesn't help when two groups of options + need to be separated. For example, with filestore and bluestore, neither + set can be used in conjunction with the other set. + + This helper validator will consume the parser to inspect the group flags, + and it will group them together from ``groups``. This allows proper error + reporting, matching each incompatible flag with its group name. + + :param parser: The argparse object, once it has configured all flags. It is + required to contain the group names being used to validate. + :param groups: A list of group names (at least two), with the same used for + ``add_argument_group`` + :param argv: Consume the args (sys.argv) directly from this argument + + .. note: **Unfortunately** this will not be able to validate correctly when + using default flags. In the case of filestore vs. bluestore, ceph-volume + defaults to --bluestore, but we can't check that programmatically, we can + only parse the flags seen via argv + """ + # Reduce the parser groups to only the groups we need to intersect + parser_groups = [g for g in parser._action_groups if g.title in groups] + # A mapping of the group name to flags/options + group_flags = {} + flags_to_verify = [] + for group in parser_groups: + # option groups may have more than one item in ``option_strings``, this + # will loop over ``_group_actions`` which contains the + # ``option_strings``, like ``['--filestore']`` + group_flags[group.title] = [ + option for group_action in group._group_actions + for option in group_action.option_strings + ] + + # Gather all the flags present in the groups so that we only check on those. + for flags in group_flags.values(): + flags_to_verify.extend(flags) + + seen = [] + last_flag = None + last_group = None + for flag in argv: + if flag not in flags_to_verify: + continue + for group_name, flags in group_flags.items(): + if flag in flags: + seen.append(group_name) + # We are mutually excluding groups, so having more than 1 group + # in ``seen`` means we must raise an error + if len(set(seen)) == len(groups): + terminal.warning('Incompatible flags were found, some values may get ignored') + msg = 'Cannot use %s (%s) with %s (%s)' % ( + last_flag, last_group, flag, group_name + ) + terminal.warning(msg) + last_group = group_name + last_flag = flag + +class ValidFraction(object): + """ + Validate fraction is in (0, 1.0] + """ + + def __call__(self, fraction): + fraction_float = float(fraction) + if math.isnan(fraction_float) or fraction_float <= 0.0 or fraction_float > 1.0: + raise argparse.ArgumentError(None, 'Fraction %f not in (0,1.0]' % fraction_float) + return fraction_float diff --git a/src/ceph-volume/ceph_volume/util/constants.py b/src/ceph-volume/ceph_volume/util/constants.py new file mode 100644 index 000000000..3ec819ec3 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/constants.py @@ -0,0 +1,46 @@ + +# mount flags +mount = dict( + xfs=['rw', 'noatime' , 'inode64'] +) + + +# format flags +mkfs = dict( + xfs=[ + # force overwriting previous fs + '-f', + # set the inode size to 2kb + '-i', 'size=2048', + ], +) + +# The fantastical world of ceph-disk labels, they should give you the +# collywobbles +ceph_disk_guids = { + # luks + '45b0969e-9b03-4f30-b4c6-35865ceff106': {'type': 'journal', 'encrypted': True, 'encryption_type': 'luks'}, + 'cafecafe-9b03-4f30-b4c6-35865ceff106': {'type': 'block', 'encrypted': True, 'encryption_type': 'luks'}, + '166418da-c469-4022-adf4-b30afd37f176': {'type': 'block.db', 'encrypted': True, 'encryption_type': 'luks'}, + '86a32090-3647-40b9-bbbd-38d8c573aa86': {'type': 'block.wal', 'encrypted': True, 'encryption_type': 'luks'}, + '4fbd7e29-9d25-41b8-afd0-35865ceff05d': {'type': 'data', 'encrypted': True, 'encryption_type': 'luks'}, + # plain + '45b0969e-9b03-4f30-b4c6-5ec00ceff106': {'type': 'journal', 'encrypted': True, 'encryption_type': 'plain'}, + 'cafecafe-9b03-4f30-b4c6-5ec00ceff106': {'type': 'block', 'encrypted': True, 'encryption_type': 'plain'}, + '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3': {'type': 'block.db', 'encrypted': True, 'encryption_type': 'plain'}, + '306e8683-4fe2-4330-b7c0-00a917c16966': {'type': 'block.wal', 'encrypted': True, 'encryption_type': 'plain'}, + '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d': {'type': 'data', 'encrypted': True, 'encryption_type': 'plain'}, + # regular guids that differ from plain + 'fb3aabf9-d25f-47cc-bf5e-721d1816496b': {'type': 'lockbox', 'encrypted': False, 'encryption_type': None}, + '30cd0809-c2b2-499c-8879-2d6b78529876': {'type': 'block.db', 'encrypted': False, 'encryption_type': None}, + '5ce17fce-4087-4169-b7ff-056cc58473f9': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None}, + '4fbd7e29-9d25-41b8-afd0-062c0ceff05d': {'type': 'data', 'encrypted': False, 'encryption_type': None}, + 'cafecafe-9b03-4f30-b4c6-b4b80ceff106': {'type': 'block', 'encrypted': False, 'encryption_type': None}, + # multipath + '01b41e1b-002a-453c-9f17-88793989ff8f': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None}, + 'ec6d6385-e346-45dc-be91-da2a7c8b3261': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None}, + '45b0969e-8ae0-4982-bf9d-5a8d867af560': {'type': 'journal', 'encrypted': False, 'encryption_type': None}, + '4fbd7e29-8ae0-4982-bf9d-5a8d867af560': {'type': 'data', 'encrypted': False, 'encryption_type': None}, + '7f4a666a-16f3-47a2-8445-152ef4d03f6c': {'type': 'lockbox', 'encrypted': False, 'encryption_type': None}, + 'cafecafe-8ae0-4982-bf9d-5a8d867af560': {'type': 'block', 'encrypted': False, 'encryption_type': None}, +} diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py new file mode 100644 index 000000000..d61222afe --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/device.py @@ -0,0 +1,722 @@ +# -*- coding: utf-8 -*- + +import logging +import os +from functools import total_ordering +from ceph_volume import sys_info +from ceph_volume.api import lvm +from ceph_volume.util import disk, system +from ceph_volume.util.lsmdisk import LSMDisk +from ceph_volume.util.constants import ceph_disk_guids +from ceph_volume.util.disk import allow_loop_devices + + +logger = logging.getLogger(__name__) + + +report_template = """ +{dev:<25} {size:<12} {device_nodes:<15} {rot!s:<7} {available!s:<9} {model}""" + + +def encryption_status(abspath): + """ + Helper function to run ``encryption.status()``. It is done here to avoid + a circular import issue (encryption module imports from this module) and to + ease testing by allowing monkeypatching of this function. + """ + from ceph_volume.util import encryption + return encryption.status(abspath) + + +class Devices(object): + """ + A container for Device instances with reporting + """ + + def __init__(self, + filter_for_batch=False, + with_lsm=False, + list_all=False): + lvs = lvm.get_lvs() + lsblk_all = disk.lsblk_all() + all_devices_vgs = lvm.get_all_devices_vgs() + if not sys_info.devices: + sys_info.devices = disk.get_devices() + self._devices = [Device(k, + with_lsm, + lvs=lvs, + lsblk_all=lsblk_all, + all_devices_vgs=all_devices_vgs) for k in + sys_info.devices.keys()] + self.devices = [] + for device in self._devices: + if filter_for_batch and not device.available_lvm_batch: + continue + if device.is_lv and not list_all: + continue + if device.is_partition and not list_all: + continue + self.devices.append(device) + + def pretty_report(self): + output = [ + report_template.format( + dev='Device Path', + size='Size', + rot='rotates', + model='Model name', + available='available', + device_nodes='Device nodes', + + )] + for device in sorted(self.devices): + output.append(device.report()) + return ''.join(output) + + def json_report(self): + output = [] + for device in sorted(self.devices): + output.append(device.json_report()) + return output + +@total_ordering +class Device(object): + + pretty_template = """ + {attr:<25} {value}""" + + report_fields = [ + 'ceph_device', + 'rejected_reasons', + 'available', + 'path', + 'sys_api', + 'device_id', + 'lsm_data', + ] + pretty_report_sys_fields = [ + 'actuators', + 'human_readable_size', + 'model', + 'removable', + 'ro', + 'rotational', + 'sas_address', + 'scheduler_mode', + 'vendor', + ] + + # define some class variables; mostly to enable the use of autospec in + # unittests + lvs = [] + + def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_vgs=None): + self.path = path + # LVs can have a vg/lv path, while disks will have /dev/sda + self.symlink = None + # check if we are a symlink + if os.path.islink(self.path): + self.symlink = self.path + real_path = os.path.realpath(self.path) + # check if we are not a device mapper + if "dm-" not in real_path: + self.path = real_path + if not sys_info.devices: + if self.path: + sys_info.devices = disk.get_devices(device=self.path) + else: + sys_info.devices = disk.get_devices() + if sys_info.devices.get(self.path, {}): + self.device_nodes = sys_info.devices[self.path]['device_nodes'] + self.sys_api = sys_info.devices.get(self.path, {}) + self.partitions = self._get_partitions() + self.lv_api = None + self.lvs = [] if not lvs else lvs + self.lsblk_all = lsblk_all + self.all_devices_vgs = all_devices_vgs + self.vgs = [] + self.vg_name = None + self.lv_name = None + self.disk_api = {} + self.blkid_api = None + self._exists = None + self._is_lvm_member = None + self.ceph_device = False + self._parse() + self.lsm_data = self.fetch_lsm(with_lsm) + + self.available_lvm, self.rejected_reasons_lvm = self._check_lvm_reject_reasons() + self.available_raw, self.rejected_reasons_raw = self._check_raw_reject_reasons() + self.available = self.available_lvm and self.available_raw + self.rejected_reasons = list(set(self.rejected_reasons_lvm + + self.rejected_reasons_raw)) + + self.device_id = self._get_device_id() + + def fetch_lsm(self, with_lsm): + ''' + Attempt to fetch libstoragemgmt (LSM) metadata, and return to the caller + as a dict. An empty dict is passed back to the caller if the target path + is not a block device, or lsm is unavailable on the host. Otherwise the + json returned will provide LSM attributes, and any associated errors that + lsm encountered when probing the device. + ''' + if not with_lsm or not self.exists or not self.is_device: + return {} + + lsm_disk = LSMDisk(self.path) + + return lsm_disk.json_report() + + def __lt__(self, other): + ''' + Implementing this method and __eq__ allows the @total_ordering + decorator to turn the Device class into a totally ordered type. + This can slower then implementing all comparison operations. + This sorting should put available devices before unavailable devices + and sort on the path otherwise (str sorting). + ''' + if self.available == other.available: + return self.path < other.path + return self.available and not other.available + + def __eq__(self, other): + return self.path == other.path + + def __hash__(self): + return hash(self.path) + + def load_blkid_api(self): + if self.blkid_api is None: + self.blkid_api = disk.blkid(self.path) + + def _parse(self): + lv = None + if not self.sys_api: + # if no device was found check if we are a partition + partname = self.path.split('/')[-1] + for device, info in sys_info.devices.items(): + part = info['partitions'].get(partname, {}) + if part: + self.sys_api = part + break + + if self.lvs: + for _lv in self.lvs: + # if the path is not absolute, we have 'vg/lv', let's use LV name + # to get the LV. + if self.path[0] == '/': + if _lv.lv_path == self.path: + lv = _lv + break + else: + vgname, lvname = self.path.split('/') + if _lv.lv_name == lvname and _lv.vg_name == vgname: + lv = _lv + break + else: + if self.path[0] == '/': + lv = lvm.get_single_lv(filters={'lv_path': self.path}) + else: + vgname, lvname = self.path.split('/') + lv = lvm.get_single_lv(filters={'lv_name': lvname, + 'vg_name': vgname}) + + if lv: + self.lv_api = lv + self.lvs = [lv] + self.path = lv.lv_path + self.vg_name = lv.vg_name + self.lv_name = lv.name + self.ceph_device = lvm.is_ceph_device(lv) + else: + self.lvs = [] + if self.lsblk_all: + for dev in self.lsblk_all: + if dev['NAME'] == os.path.basename(self.path): + break + else: + dev = disk.lsblk(self.path) + self.disk_api = dev + device_type = dev.get('TYPE', '') + # always check is this is an lvm member + valid_types = ['part', 'disk', 'mpath'] + if allow_loop_devices(): + valid_types.append('loop') + if device_type in valid_types: + self._set_lvm_membership() + + self.ceph_disk = CephDiskDevice(self) + + def __repr__(self): + prefix = 'Unknown' + if self.is_lv: + prefix = 'LV' + elif self.is_partition: + prefix = 'Partition' + elif self.is_device: + prefix = 'Raw Device' + return '<%s: %s>' % (prefix, self.path) + + def pretty_report(self): + def format_value(v): + if isinstance(v, list): + return ', '.join(v) + else: + return v + def format_key(k): + return k.strip('_').replace('_', ' ') + output = ['\n====== Device report {} ======\n'.format(self.path)] + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in vars(self).items() if k in + self.report_fields and k != 'disk_api' and k != 'sys_api'] ) + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in self.sys_api.items() if k in + self.pretty_report_sys_fields]) + for lv in self.lvs: + output.append(""" + --- Logical Volume ---""") + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in lv.report().items()]) + return ''.join(output) + + def report(self): + return report_template.format( + dev=self.path, + size=self.size_human, + rot=self.rotational, + available=self.available, + model=self.model, + device_nodes=self.device_nodes + ) + + def json_report(self): + output = {k.strip('_'): v for k, v in vars(self).items() if k in + self.report_fields} + output['lvs'] = [lv.report() for lv in self.lvs] + return output + + def _get_device_id(self): + """ + Please keep this implementation in sync with get_device_id() in + src/common/blkdev.cc + """ + props = ['ID_VENDOR', 'ID_MODEL', 'ID_MODEL_ENC', 'ID_SERIAL_SHORT', 'ID_SERIAL', + 'ID_SCSI_SERIAL'] + p = disk.udevadm_property(self.path, props) + if p.get('ID_MODEL','').startswith('LVM PV '): + p['ID_MODEL'] = p.get('ID_MODEL_ENC', '').replace('\\x20', ' ').strip() + if 'ID_VENDOR' in p and 'ID_MODEL' in p and 'ID_SCSI_SERIAL' in p: + dev_id = '_'.join([p['ID_VENDOR'], p['ID_MODEL'], + p['ID_SCSI_SERIAL']]) + elif 'ID_MODEL' in p and 'ID_SERIAL_SHORT' in p: + dev_id = '_'.join([p['ID_MODEL'], p['ID_SERIAL_SHORT']]) + elif 'ID_SERIAL' in p: + dev_id = p['ID_SERIAL'] + if dev_id.startswith('MTFD'): + # Micron NVMes hide the vendor + dev_id = 'Micron_' + dev_id + else: + # the else branch should fallback to using sysfs and ioctl to + # retrieve device_id on FreeBSD. Still figuring out if/how the + # python ioctl implementation does that on FreeBSD + dev_id = '' + dev_id = dev_id.replace(' ', '_') + while '__' in dev_id: + dev_id = dev_id.replace('__', '_') + return dev_id + + def _set_lvm_membership(self): + if self._is_lvm_member is None: + # this is contentious, if a PV is recognized by LVM but has no + # VGs, should we consider it as part of LVM? We choose not to + # here, because most likely, we need to use VGs from this PV. + self._is_lvm_member = False + device_to_check = [self.path] + device_to_check.extend(self.partitions) + + # a pv can only be in one vg, so this should be safe + # FIXME: While the above assumption holds, sda1 and sda2 + # can each host a PV and VG. I think the vg_name property is + # actually unused (not 100% sure) and can simply be removed + vgs = None + if not self.all_devices_vgs: + self.all_devices_vgs = lvm.get_all_devices_vgs() + for path in device_to_check: + for dev_vg in self.all_devices_vgs: + if dev_vg.pv_name == path: + vgs = [dev_vg] + if vgs: + self.vgs.extend(vgs) + self.vg_name = vgs[0] + self._is_lvm_member = True + self.lvs.extend(lvm.get_device_lvs(path)) + if self.lvs: + self.ceph_device = any([True if lv.tags.get('ceph.osd_id') else False for lv in self.lvs]) + + def _get_partitions(self): + """ + For block devices LVM can reside on the raw block device or on a + partition. Return a list of paths to be checked for a pv. + """ + partitions = [] + path_dir = os.path.dirname(self.path) + for partition in self.sys_api.get('partitions', {}).keys(): + partitions.append(os.path.join(path_dir, partition)) + return partitions + + @property + def exists(self): + return os.path.exists(self.path) + + @property + def has_fs(self): + self.load_blkid_api() + return 'TYPE' in self.blkid_api + + @property + def has_gpt_headers(self): + self.load_blkid_api() + return self.blkid_api.get("PTTYPE") == "gpt" + + @property + def rotational(self): + rotational = self.sys_api.get('rotational') + if rotational is None: + # fall back to lsblk if not found in sys_api + # default to '1' if no value is found with lsblk either + rotational = self.disk_api.get('ROTA', '1') + return rotational == '1' + + @property + def model(self): + return self.sys_api['model'] + + @property + def size_human(self): + return self.sys_api['human_readable_size'] + + @property + def size(self): + return self.sys_api['size'] + + @property + def parent_device(self): + if 'PKNAME' in self.disk_api: + return '/dev/%s' % self.disk_api['PKNAME'] + return None + + @property + def lvm_size(self): + """ + If this device was made into a PV it would lose 1GB in total size + due to the 1GB physical extent size we set when creating volume groups + """ + size = disk.Size(b=self.size) + lvm_size = disk.Size(gb=size.gb.as_int()) - disk.Size(gb=1) + return lvm_size + + @property + def is_lvm_member(self): + if self._is_lvm_member is None: + self._set_lvm_membership() + return self._is_lvm_member + + @property + def is_ceph_disk_member(self): + def is_member(device): + return 'ceph' in device.get('PARTLABEL', '') or \ + device.get('PARTTYPE', '') in ceph_disk_guids.keys() + # If we come from Devices(), self.lsblk_all is set already. + # Otherwise, we have to grab the data. + details = self.lsblk_all or disk.lsblk_all() + _is_member = False + if self.sys_api.get("partitions"): + for part in self.sys_api.get("partitions").keys(): + for dev in details: + if part.startswith(dev['NAME']): + if is_member(dev): + _is_member = True + return _is_member + else: + return is_member(self.disk_api) + raise RuntimeError(f"Couln't check if device {self.path} is a ceph-disk member.") + + @property + def has_bluestore_label(self): + return disk.has_bluestore_label(self.path) + + @property + def is_mapper(self): + return self.path.startswith(('/dev/mapper', '/dev/dm-')) + + @property + def device_type(self): + self.load_blkid_api() + if 'type' in self.sys_api: + return self.sys_api['type'] + elif self.disk_api: + return self.disk_api['TYPE'] + elif self.blkid_api: + return self.blkid_api['TYPE'] + + @property + def is_mpath(self): + return self.device_type == 'mpath' + + @property + def is_lv(self): + return self.lv_api is not None + + @property + def is_partition(self): + self.load_blkid_api() + if self.disk_api: + return self.disk_api['TYPE'] == 'part' + elif self.blkid_api: + return self.blkid_api['TYPE'] == 'part' + return False + + @property + def is_device(self): + self.load_blkid_api() + api = None + if self.disk_api: + api = self.disk_api + elif self.blkid_api: + api = self.blkid_api + if api: + valid_types = ['disk', 'device', 'mpath'] + if allow_loop_devices(): + valid_types.append('loop') + return self.device_type in valid_types + return False + + @property + def is_acceptable_device(self): + return self.is_device or self.is_partition or self.is_lv + + @property + def is_encrypted(self): + """ + Only correct for LVs, device mappers, and partitions. Will report a ``None`` + for raw devices. + """ + self.load_blkid_api() + crypt_reports = [self.blkid_api.get('TYPE', ''), self.disk_api.get('FSTYPE', '')] + if self.is_lv: + # if disk APIs are reporting this is encrypted use that: + if 'crypto_LUKS' in crypt_reports: + return True + # if ceph-volume created this, then a tag would let us know + elif self.lv_api.encrypted: + return True + return False + elif self.is_partition: + return 'crypto_LUKS' in crypt_reports + elif self.is_mapper: + active_mapper = encryption_status(self.path) + if active_mapper: + # normalize a bit to ensure same values regardless of source + encryption_type = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks + return True if encryption_type in ['plain', 'luks'] else False + else: + return False + else: + return None + + @property + def used_by_ceph(self): + # only filter out data devices as journals could potentially be reused + osd_ids = [lv.tags.get("ceph.osd_id") is not None for lv in self.lvs + if lv.tags.get("ceph.type") in ["data", "block"]] + return any(osd_ids) + + @property + def journal_used_by_ceph(self): + # similar to used_by_ceph() above. This is for 'journal' devices (db/wal/..) + # needed by get_lvm_fast_allocs() in devices/lvm/batch.py + # see https://tracker.ceph.com/issues/59640 + osd_ids = [lv.tags.get("ceph.osd_id") is not None for lv in self.lvs + if lv.tags.get("ceph.type") in ["db", "wal"]] + return any(osd_ids) + + @property + def vg_free_percent(self): + if self.vgs: + return [vg.free_percent for vg in self.vgs] + else: + return [1] + + @property + def vg_size(self): + if self.vgs: + return [vg.size for vg in self.vgs] + else: + # TODO fix this...we can probably get rid of vg_free + return self.vg_free + + @property + def vg_free(self): + ''' + Returns the free space in all VGs on this device. If no VGs are + present, returns the disk size. + ''' + if self.vgs: + return [vg.free for vg in self.vgs] + else: + # We could also query 'lvmconfig + # --typeconfig full' and use allocations -> physical_extent_size + # value to project the space for a vg + # assuming 4M extents here + extent_size = 4194304 + vg_free = int(self.size / extent_size) * extent_size + if self.size % extent_size == 0: + # If the extent size divides size exactly, deduct on extent for + # LVM metadata + vg_free -= extent_size + return [vg_free] + + @property + def has_partitions(self): + ''' + Boolean to determine if a given device has partitions. + ''' + if self.sys_api.get('partitions'): + return True + return False + + def _check_generic_reject_reasons(self): + reasons = [ + ('removable', 1, 'removable'), + ('ro', 1, 'read-only'), + ] + rejected = [reason for (k, v, reason) in reasons if + self.sys_api.get(k, '') == v] + if self.is_acceptable_device: + # reject disks smaller than 5GB + if int(self.sys_api.get('size', 0)) < 5368709120: + rejected.append('Insufficient space (<5GB)') + else: + rejected.append("Device type is not acceptable. It should be raw device or partition") + if self.is_ceph_disk_member: + rejected.append("Used by ceph-disk") + + try: + if self.has_bluestore_label: + rejected.append('Has BlueStore device label') + except OSError as e: + # likely failed to open the device. assuming it is BlueStore is the safest option + # so that a possibly-already-existing OSD doesn't get overwritten + logger.error('failed to determine if device {} is BlueStore. device should not be used to avoid false negatives. err: {}'.format(self.path, e)) + rejected.append('Failed to determine if device is BlueStore') + + if self.is_partition: + try: + if disk.has_bluestore_label(self.parent_device): + rejected.append('Parent has BlueStore device label') + except OSError as e: + # likely failed to open the device. assuming the parent is BlueStore is the safest + # option so that a possibly-already-existing OSD doesn't get overwritten + logger.error('failed to determine if partition {} (parent: {}) has a BlueStore parent. partition should not be used to avoid false negatives. err: {}'.format(self.path, self.parent_device, e)) + rejected.append('Failed to determine if parent device is BlueStore') + + if self.has_gpt_headers: + rejected.append('Has GPT headers') + if self.has_partitions: + rejected.append('Has partitions') + if self.has_fs: + rejected.append('Has a FileSystem') + return rejected + + def _check_lvm_reject_reasons(self): + rejected = [] + if self.vgs: + available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10] + if not available_vgs: + rejected.append('Insufficient space (<10 extents) on vgs') + else: + # only check generic if no vgs are present. Vgs might hold lvs and + # that might cause 'locked' to trigger + rejected.extend(self._check_generic_reject_reasons()) + + return len(rejected) == 0, rejected + + def _check_raw_reject_reasons(self): + rejected = self._check_generic_reject_reasons() + if len(self.vgs) > 0: + rejected.append('LVM detected') + + return len(rejected) == 0, rejected + + @property + def available_lvm_batch(self): + if self.sys_api.get("partitions"): + return False + if system.device_is_mounted(self.path): + return False + return self.is_device or self.is_lv + + +class CephDiskDevice(object): + """ + Detect devices that have been created by ceph-disk, report their type + (journal, data, etc..). Requires a ``Device`` object as input. + """ + + def __init__(self, device): + self.device = device + self._is_ceph_disk_member = None + + @property + def partlabel(self): + """ + In containers, the 'PARTLABEL' attribute might not be detected + correctly via ``lsblk``, so we poke at the value with ``lsblk`` first, + falling back to ``blkid`` (which works correclty in containers). + """ + lsblk_partlabel = self.device.disk_api.get('PARTLABEL') + if lsblk_partlabel: + return lsblk_partlabel + return self.device.blkid_api.get('PARTLABEL', '') + + @property + def parttype(self): + """ + Seems like older version do not detect PARTTYPE correctly (assuming the + info in util/disk.py#lsblk is still valid). + SImply resolve to using blkid since lsblk will throw an error if asked + for an unknown columns + """ + return self.device.blkid_api.get('PARTTYPE', '') + + @property + def is_member(self): + if self._is_ceph_disk_member is None: + if 'ceph' in self.partlabel: + self._is_ceph_disk_member = True + return True + elif self.parttype in ceph_disk_guids.keys(): + return True + return False + return self._is_ceph_disk_member + + @property + def type(self): + types = [ + 'data', 'wal', 'db', 'lockbox', 'journal', + # ceph-disk uses 'ceph block' when placing data in bluestore, but + # keeps the regular OSD files in 'ceph data' :( :( :( :( + 'block', + ] + for t in types: + if t in self.partlabel: + return t + label = ceph_disk_guids.get(self.parttype, {}) + return label.get('type', 'unknown').split('.')[-1] diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py new file mode 100644 index 000000000..ee061b724 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -0,0 +1,943 @@ +import logging +import os +import re +import stat +import time +from ceph_volume import process +from ceph_volume.api import lvm +from ceph_volume.util.system import get_file_contents + + +logger = logging.getLogger(__name__) + + +# The blkid CLI tool has some oddities which prevents having one common call +# to extract the information instead of having separate utilities. The `udev` +# type of output is needed in older versions of blkid (v 2.23) that will not +# work correctly with just the ``-p`` flag to bypass the cache for example. +# Xenial doesn't have this problem as it uses a newer blkid version. + + +def get_partuuid(device): + """ + If a device is a partition, it will probably have a PARTUUID on it that + will persist and can be queried against `blkid` later to detect the actual + device + """ + out, err, rc = process.call( + ['blkid', '-c', '/dev/null', '-s', 'PARTUUID', '-o', 'value', device] + ) + return ' '.join(out).strip() + + +def _blkid_parser(output): + """ + Parses the output from a system ``blkid`` call, requires output to be + produced using the ``-p`` flag which bypasses the cache, mangling the + names. These names are corrected to what it would look like without the + ``-p`` flag. + + Normal output:: + + /dev/sdb1: UUID="62416664-cbaf-40bd-9689-10bd337379c3" TYPE="xfs" [...] + """ + # first spaced separated item is garbage, gets tossed: + output = ' '.join(output.split()[1:]) + # split again, respecting possible whitespace in quoted values + pairs = output.split('" ') + raw = {} + processed = {} + mapping = { + 'UUID': 'UUID', + 'TYPE': 'TYPE', + 'PART_ENTRY_NAME': 'PARTLABEL', + 'PART_ENTRY_UUID': 'PARTUUID', + 'PART_ENTRY_TYPE': 'PARTTYPE', + 'PTTYPE': 'PTTYPE', + } + + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + raw[column] = value.strip().strip().strip('"') + + for key, value in raw.items(): + new_key = mapping.get(key) + if not new_key: + continue + processed[new_key] = value + + return processed + + +def blkid(device): + """ + The blkid interface to its CLI, creating an output similar to what is + expected from ``lsblk``. In most cases, ``lsblk()`` should be the preferred + method for extracting information about a device. There are some corner + cases where it might provide information that is otherwise unavailable. + + The system call uses the ``-p`` flag which bypasses the cache, the caveat + being that the keys produced are named completely different to expected + names. + + For example, instead of ``PARTLABEL`` it provides a ``PART_ENTRY_NAME``. + A bit of translation between these known keys is done, which is why + ``lsblk`` should always be preferred: the output provided here is not as + rich, given that a translation of keys is required for a uniform interface + with the ``-p`` flag. + + Label name to expected output chart: + + cache bypass name expected name + + UUID UUID + TYPE TYPE + PART_ENTRY_NAME PARTLABEL + PART_ENTRY_UUID PARTUUID + """ + out, err, rc = process.call( + ['blkid', '-c', '/dev/null', '-p', device] + ) + return _blkid_parser(' '.join(out)) + + +def get_part_entry_type(device): + """ + Parses the ``ID_PART_ENTRY_TYPE`` from the "low level" (bypasses the cache) + output that uses the ``udev`` type of output. This output is intended to be + used for udev rules, but it is useful in this case as it is the only + consistent way to retrieve the GUID used by ceph-disk to identify devices. + """ + out, err, rc = process.call(['blkid', '-c', '/dev/null', '-p', '-o', 'udev', device]) + for line in out: + if 'ID_PART_ENTRY_TYPE=' in line: + return line.split('=')[-1].strip() + return '' + + +def get_device_from_partuuid(partuuid): + """ + If a device has a partuuid, query blkid so that it can tell us what that + device is + """ + out, err, rc = process.call( + ['blkid', '-c', '/dev/null', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device'] + ) + return ' '.join(out).strip() + + +def remove_partition(device): + """ + Removes a partition using parted + + :param device: A ``Device()`` object + """ + # Sometimes there's a race condition that makes 'ID_PART_ENTRY_NUMBER' be not present + # in the output of `udevadm info --query=property`. + # Probably not ideal and not the best fix but this allows to get around that issue. + # The idea is to make it retry multiple times before actually failing. + for i in range(10): + udev_info = udevadm_property(device.path) + partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') + if partition_number: + break + time.sleep(0.2) + if not partition_number: + raise RuntimeError('Unable to detect the partition number for device: %s' % device.path) + + process.run( + ['parted', device.parent_device, '--script', '--', 'rm', partition_number] + ) + + +def _stat_is_device(stat_obj): + """ + Helper function that will interpret ``os.stat`` output directly, so that other + functions can call ``os.stat`` once and interpret that result several times + """ + return stat.S_ISBLK(stat_obj) + + +def _lsblk_parser(line): + """ + Parses lines in lsblk output. Requires output to be in pair mode (``-P`` flag). Lines + need to be whole strings, the line gets split when processed. + + :param line: A string, with the full line from lsblk output + """ + # parse the COLUMN="value" output to construct the dictionary + pairs = line.split('" ') + parsed = {} + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + parsed[column] = value.strip().strip().strip('"') + return parsed + + +def device_family(device): + """ + Returns a list of associated devices. It assumes that ``device`` is + a parent device. It is up to the caller to ensure that the device being + used is a parent, not a partition. + """ + labels = ['NAME', 'PARTLABEL', 'TYPE'] + command = ['lsblk', '-P', '-p', '-o', ','.join(labels), device] + out, err, rc = process.call(command) + devices = [] + for line in out: + devices.append(_lsblk_parser(line)) + + return devices + + +def udevadm_property(device, properties=[]): + """ + Query udevadm for information about device properties. + Optionally pass a list of properties to return. A requested property might + not be returned if not present. + + Expected output format:: + # udevadm info --query=property --name=/dev/sda :( + DEVNAME=/dev/sda + DEVTYPE=disk + ID_ATA=1 + ID_BUS=ata + ID_MODEL=SK_hynix_SC311_SATA_512GB + ID_PART_TABLE_TYPE=gpt + ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c + ID_PATH=pci-0000:00:17.0-ata-3 + ID_PATH_TAG=pci-0000_00_17_0-ata-3 + ID_REVISION=70000P10 + ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A + TAGS=:systemd: + USEC_INITIALIZED=16117769 + ... + """ + out = _udevadm_info(device) + ret = {} + for line in out: + p, v = line.split('=', 1) + if not properties or p in properties: + ret[p] = v + return ret + + +def _udevadm_info(device): + """ + Call udevadm and return the output + """ + cmd = ['udevadm', 'info', '--query=property', device] + out, _err, _rc = process.call(cmd) + return out + + +def lsblk(device, columns=None, abspath=False): + result = [] + if not os.path.isdir(device): + result = lsblk_all(device=device, + columns=columns, + abspath=abspath) + if not result: + logger.debug(f"{device} not found is lsblk report") + return {} + + return result[0] + +def lsblk_all(device='', columns=None, abspath=False): + """ + Create a dictionary of identifying values for a device using ``lsblk``. + Each supported column is a key, in its *raw* format (all uppercase + usually). ``lsblk`` has support for certain "columns" (in blkid these + would be labels), and these columns vary between distributions and + ``lsblk`` versions. The newer versions support a richer set of columns, + while older ones were a bit limited. + + These are a subset of lsblk columns which are known to work on both CentOS 7 and Xenial: + + NAME device name + KNAME internal kernel device name + PKNAME internal kernel parent device name + MAJ:MIN major:minor device number + FSTYPE filesystem type + MOUNTPOINT where the device is mounted + LABEL filesystem LABEL + UUID filesystem UUID + RO read-only device + RM removable device + MODEL device identifier + SIZE size of the device + STATE state of the device + OWNER user name + GROUP group name + MODE device node permissions + ALIGNMENT alignment offset + MIN-IO minimum I/O size + OPT-IO optimal I/O size + PHY-SEC physical sector size + LOG-SEC logical sector size + ROTA rotational device + SCHED I/O scheduler name + RQ-SIZE request queue size + TYPE device type + PKNAME internal parent kernel device name + DISC-ALN discard alignment offset + DISC-GRAN discard granularity + DISC-MAX discard max bytes + DISC-ZERO discard zeroes data + + There is a bug in ``lsblk`` where using all the available (supported) + columns will result in no output (!), in order to workaround this the + following columns have been removed from the default reporting columns: + + * RQ-SIZE (request queue size) + * MIN-IO minimum I/O size + * OPT-IO optimal I/O size + + These should be available however when using `columns`. For example:: + + >>> lsblk('/dev/sda1', columns=['OPT-IO']) + {'OPT-IO': '0'} + + Normal CLI output, as filtered by the flags in this function will look like :: + + $ lsblk -P -o NAME,KNAME,PKNAME,MAJ:MIN,FSTYPE,MOUNTPOINT + NAME="sda1" KNAME="sda1" MAJ:MIN="8:1" FSTYPE="ext4" MOUNTPOINT="/" + + :param columns: A list of columns to report as keys in its original form. + :param abspath: Set the flag for absolute paths on the report + """ + default_columns = [ + 'NAME', 'KNAME', 'PKNAME', 'MAJ:MIN', 'FSTYPE', 'MOUNTPOINT', 'LABEL', + 'UUID', 'RO', 'RM', 'MODEL', 'SIZE', 'STATE', 'OWNER', 'GROUP', 'MODE', + 'ALIGNMENT', 'PHY-SEC', 'LOG-SEC', 'ROTA', 'SCHED', 'TYPE', 'DISC-ALN', + 'DISC-GRAN', 'DISC-MAX', 'DISC-ZERO', 'PKNAME', 'PARTLABEL' + ] + columns = columns or default_columns + # -P -> Produce pairs of COLUMN="value" + # -p -> Return full paths to devices, not just the names, when ``abspath`` is set + # -o -> Use the columns specified or default ones provided by this function + base_command = ['lsblk', '-P'] + if abspath: + base_command.append('-p') + base_command.append('-o') + base_command.append(','.join(columns)) + if device: + base_command.append('--nodeps') + base_command.append(device) + + out, err, rc = process.call(base_command) + + if rc != 0: + raise RuntimeError(f"Error: {err}") + + result = [] + + for line in out: + result.append(_lsblk_parser(line)) + + return result + + +def is_device(dev): + """ + Boolean to determine if a given device is a block device (**not** + a partition!) + + For example: /dev/sda would return True, but not /dev/sdc1 + """ + if not os.path.exists(dev): + return False + if not dev.startswith('/dev/'): + return False + if dev[len('/dev/'):].startswith('loop'): + if not allow_loop_devices(): + return False + + TYPE = lsblk(dev).get('TYPE') + if TYPE: + return TYPE in ['disk', 'mpath'] + + # fallback to stat + return _stat_is_device(os.lstat(dev).st_mode) + + +def is_partition(dev): + """ + Boolean to determine if a given device is a partition, like /dev/sda1 + """ + if not os.path.exists(dev): + return False + # use lsblk first, fall back to using stat + TYPE = lsblk(dev).get('TYPE') + if TYPE: + return TYPE == 'part' + + # fallback to stat + stat_obj = os.stat(dev) + if _stat_is_device(stat_obj.st_mode): + return False + + major = os.major(stat_obj.st_rdev) + minor = os.minor(stat_obj.st_rdev) + if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)): + return True + return False + + +def is_ceph_rbd(dev): + """ + Boolean to determine if a given device is a ceph RBD device, like /dev/rbd0 + """ + return dev.startswith(('/dev/rbd')) + + +class BaseFloatUnit(float): + """ + Base class to support float representations of size values. Suffix is + computed on child classes by inspecting the class name + """ + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self.__float__()) + + def __str__(self): + return "{size:.2f} {suffix}".format( + size=self.__float__(), + suffix=self.__class__.__name__.split('Float')[-1] + ) + + def as_int(self): + return int(self.real) + + def as_float(self): + return self.real + + +class FloatB(BaseFloatUnit): + pass + + +class FloatMB(BaseFloatUnit): + pass + + +class FloatGB(BaseFloatUnit): + pass + + +class FloatKB(BaseFloatUnit): + pass + + +class FloatTB(BaseFloatUnit): + pass + +class FloatPB(BaseFloatUnit): + pass + +class Size(object): + """ + Helper to provide an interface for different sizes given a single initial + input. Allows for comparison between different size objects, which avoids + the need to convert sizes before comparison (e.g. comparing megabytes + against gigabytes). + + Common comparison operators are supported:: + + >>> hd1 = Size(gb=400) + >>> hd2 = Size(gb=500) + >>> hd1 > hd2 + False + >>> hd1 < hd2 + True + >>> hd1 == hd2 + False + >>> hd1 == Size(gb=400) + True + + The Size object can also be multiplied or divided:: + + >>> hd1 + <Size(400.00 GB)> + >>> hd1 * 2 + <Size(800.00 GB)> + >>> hd1 + <Size(800.00 GB)> + + Additions and subtractions are only supported between Size objects:: + + >>> Size(gb=224) - Size(gb=100) + <Size(124.00 GB)> + >>> Size(gb=1) + Size(mb=300) + <Size(1.29 GB)> + + Can also display a human-readable representation, with automatic detection + on best suited unit, or alternatively, specific unit representation:: + + >>> s = Size(mb=2211) + >>> s + <Size(2.16 GB)> + >>> s.mb + <FloatMB(2211.0)> + >>> print("Total size: %s" % s.mb) + Total size: 2211.00 MB + >>> print("Total size: %s" % s) + Total size: 2.16 GB + """ + + @classmethod + def parse(cls, size): + if (len(size) > 2 and + size[-2].lower() in ['k', 'm', 'g', 't', 'p'] and + size[-1].lower() == 'b'): + return cls(**{size[-2:].lower(): float(size[0:-2])}) + elif size[-1].lower() in ['b', 'k', 'm', 'g', 't', 'p']: + return cls(**{size[-1].lower(): float(size[0:-1])}) + else: + return cls(b=float(size)) + + + def __init__(self, multiplier=1024, **kw): + self._multiplier = multiplier + # create a mapping of units-to-multiplier, skip bytes as that is + # calculated initially always and does not need to convert + aliases = [ + [('k', 'kb', 'kilobytes'), self._multiplier], + [('m', 'mb', 'megabytes'), self._multiplier ** 2], + [('g', 'gb', 'gigabytes'), self._multiplier ** 3], + [('t', 'tb', 'terabytes'), self._multiplier ** 4], + [('p', 'pb', 'petabytes'), self._multiplier ** 5] + ] + # and mappings for units-to-formatters, including bytes and aliases for + # each + format_aliases = [ + [('b', 'bytes'), FloatB], + [('kb', 'kilobytes'), FloatKB], + [('mb', 'megabytes'), FloatMB], + [('gb', 'gigabytes'), FloatGB], + [('tb', 'terabytes'), FloatTB], + [('pb', 'petabytes'), FloatPB], + ] + self._formatters = {} + for key, value in format_aliases: + for alias in key: + self._formatters[alias] = value + self._factors = {} + for key, value in aliases: + for alias in key: + self._factors[alias] = value + + for k, v in kw.items(): + self._convert(v, k) + # only pursue the first occurrence + break + + def _convert(self, size, unit): + """ + Convert any size down to bytes so that other methods can rely on bytes + being available always, regardless of what they pass in, avoiding the + need for a mapping of every permutation. + """ + if unit in ['b', 'bytes']: + self._b = size + return + factor = self._factors[unit] + self._b = float(size * factor) + + def _get_best_format(self): + """ + Go through all the supported units, and use the first one that is less + than 1024. This allows to represent size in the most readable format + available + """ + for unit in ['b', 'kb', 'mb', 'gb', 'tb', 'pb']: + if getattr(self, unit) > 1024: + continue + return getattr(self, unit) + + def __repr__(self): + return "<Size(%s)>" % self._get_best_format() + + def __str__(self): + return "%s" % self._get_best_format() + + def __format__(self, spec): + return str(self._get_best_format()).__format__(spec) + + def __int__(self): + return int(self._b) + + def __float__(self): + return self._b + + def __lt__(self, other): + if isinstance(other, Size): + return self._b < other._b + else: + return self.b < other + + def __le__(self, other): + if isinstance(other, Size): + return self._b <= other._b + else: + return self.b <= other + + def __eq__(self, other): + if isinstance(other, Size): + return self._b == other._b + else: + return self.b == other + + def __ne__(self, other): + if isinstance(other, Size): + return self._b != other._b + else: + return self.b != other + + def __ge__(self, other): + if isinstance(other, Size): + return self._b >= other._b + else: + return self.b >= other + + def __gt__(self, other): + if isinstance(other, Size): + return self._b > other._b + else: + return self.b > other + + def __add__(self, other): + if isinstance(other, Size): + _b = self._b + other._b + return Size(b=_b) + raise TypeError('Cannot add "Size" object with int') + + def __sub__(self, other): + if isinstance(other, Size): + _b = self._b - other._b + return Size(b=_b) + raise TypeError('Cannot subtract "Size" object from int') + + def __mul__(self, other): + if isinstance(other, Size): + raise TypeError('Cannot multiply with "Size" object') + _b = self._b * other + return Size(b=_b) + + def __truediv__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __div__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __bool__(self): + return self.b != 0 + + def __nonzero__(self): + return self.__bool__() + + def __getattr__(self, unit): + """ + Calculate units on the fly, relies on the fact that ``bytes`` has been + converted at instantiation. Units that don't exist will trigger an + ``AttributeError`` + """ + try: + formatter = self._formatters[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + if unit in ['b', 'bytes']: + return formatter(self._b) + try: + factor = self._factors[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + return formatter(float(self._b) / factor) + + +def human_readable_size(size): + """ + Take a size in bytes, and transform it into a human readable size with up + to two decimals of precision. + """ + suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] + for suffix in suffixes: + if size >= 1024: + size = size / 1024 + else: + break + return "{size:.2f} {suffix}".format( + size=size, + suffix=suffix) + + +def size_from_human_readable(s): + """ + Takes a human readable string and converts into a Size. If no unit is + passed, bytes is assumed. + """ + s = s.replace(' ', '') + if s[-1].isdigit(): + return Size(b=float(s)) + n = float(s[:-1]) + if s[-1].lower() == 'p': + return Size(pb=n) + if s[-1].lower() == 't': + return Size(tb=n) + if s[-1].lower() == 'g': + return Size(gb=n) + if s[-1].lower() == 'm': + return Size(mb=n) + if s[-1].lower() == 'k': + return Size(kb=n) + return None + + +def get_partitions_facts(sys_block_path): + partition_metadata = {} + for folder in os.listdir(sys_block_path): + folder_path = os.path.join(sys_block_path, folder) + if os.path.exists(os.path.join(folder_path, 'partition')): + contents = get_file_contents(os.path.join(folder_path, 'partition')) + if contents: + part = {} + partname = folder + part_sys_block_path = os.path.join(sys_block_path, partname) + + part['start'] = get_file_contents(part_sys_block_path + "/start", 0) + part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0) + + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/logical_block_size") + if not part['sectorsize']: + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/hw_sector_size", 512) + part['size'] = float(part['sectors']) * 512 + part['human_readable_size'] = human_readable_size(float(part['sectors']) * 512) + part['holders'] = [] + for holder in os.listdir(part_sys_block_path + '/holders'): + part['holders'].append(holder) + + partition_metadata[partname] = part + return partition_metadata + + +def is_mapper_device(device_name): + return device_name.startswith(('/dev/mapper', '/dev/dm-')) + + +class AllowLoopDevices(object): + allow = False + warned = False + + @classmethod + def __call__(cls): + val = os.environ.get("CEPH_VOLUME_ALLOW_LOOP_DEVICES", "false").lower() + if val not in ("false", 'no', '0'): + cls.allow = True + if not cls.warned: + logger.warning( + "CEPH_VOLUME_ALLOW_LOOP_DEVICES is set in your " + "environment, so we will allow the use of unattached loop" + " devices as disks. This feature is intended for " + "development purposes only and will never be supported in" + " production. Issues filed based on this behavior will " + "likely be ignored." + ) + cls.warned = True + return cls.allow + + +allow_loop_devices = AllowLoopDevices() + + +def get_block_devs_sysfs(_sys_block_path='/sys/block', _sys_dev_block_path='/sys/dev/block', device=''): + def holder_inner_loop(): + for holder in holders: + # /sys/block/sdy/holders/dm-8/dm/uuid + holder_dm_type = get_file_contents(os.path.join(_sys_block_path, dev, f'holders/{holder}/dm/uuid')).split('-')[0].lower() + if holder_dm_type == 'mpath': + return True + + # First, get devices that are _not_ partitions + result = list() + if not device: + dev_names = os.listdir(_sys_block_path) + else: + dev_names = [device] + for dev in dev_names: + name = kname = os.path.join("/dev", dev) + if not os.path.exists(name): + continue + type_ = 'disk' + holders = os.listdir(os.path.join(_sys_block_path, dev, 'holders')) + if get_file_contents(os.path.join(_sys_block_path, dev, 'removable')) == "1": + continue + if holder_inner_loop(): + continue + dm_dir_path = os.path.join(_sys_block_path, dev, 'dm') + if os.path.isdir(dm_dir_path): + dm_type = get_file_contents(os.path.join(dm_dir_path, 'uuid')) + type_ = dm_type.split('-')[0].lower() + basename = get_file_contents(os.path.join(dm_dir_path, 'name')) + name = os.path.join("/dev/mapper", basename) + if dev.startswith('loop'): + if not allow_loop_devices(): + continue + # Skip loop devices that are not attached + if not os.path.exists(os.path.join(_sys_block_path, dev, 'loop')): + continue + type_ = 'loop' + result.append([kname, name, type_]) + # Next, look for devices that _are_ partitions + for item in os.listdir(_sys_dev_block_path): + is_part = get_file_contents(os.path.join(_sys_dev_block_path, item, 'partition')) == "1" + dev = os.path.basename(os.readlink(os.path.join(_sys_dev_block_path, item))) + if not is_part: + continue + name = kname = os.path.join("/dev", dev) + result.append([name, kname, "part"]) + return sorted(result, key=lambda x: x[0]) + +def get_partitions(_sys_dev_block_path ='/sys/dev/block'): + devices = os.listdir(_sys_dev_block_path) + result = dict() + for device in devices: + device_path = os.path.join(_sys_dev_block_path, device) + is_partition = get_file_contents(os.path.join(device_path, 'partition')) == "1" + if not is_partition: + continue + + partition_sys_name = os.path.basename(os.readlink(device_path)) + parent_device_sys_name = os.readlink(device_path).split('/')[-2:-1][0] + result[partition_sys_name] = parent_device_sys_name + return result + +def get_devices(_sys_block_path='/sys/block', device=''): + """ + Captures all available block devices as reported by lsblk. + Additional interesting metadata like sectors, size, vendor, + solid/rotational, etc. is collected from /sys/block/<device> + + Returns a dictionary, where keys are the full paths to devices. + + ..note:: loop devices, removable media, and logical volumes are never included. + """ + + device_facts = {} + + block_devs = get_block_devs_sysfs(_sys_block_path) + partitions = get_partitions() + + block_types = ['disk', 'mpath', 'lvm', 'part'] + if allow_loop_devices(): + block_types.append('loop') + + for block in block_devs: + if block[2] == 'lvm': + block[1] = lvm.get_lv_path_from_mapper(block[1]) + devname = os.path.basename(block[0]) + diskname = block[1] + if block[2] not in block_types: + continue + sysdir = os.path.join(_sys_block_path, devname) + if block[2] == 'part': + sysdir = os.path.join(_sys_block_path, partitions[devname], devname) + metadata = {} + + # If the device is ceph rbd it gets excluded + if is_ceph_rbd(diskname): + continue + + # If the mapper device is a logical volume it gets excluded + if is_mapper_device(diskname): + if lvm.get_device_lvs(diskname): + continue + + # all facts that have no defaults + # (<name>, <path relative to _sys_block_path>) + facts = [('removable', 'removable'), + ('ro', 'ro'), + ('vendor', 'device/vendor'), + ('model', 'device/model'), + ('rev', 'device/rev'), + ('sas_address', 'device/sas_address'), + ('sas_device_handle', 'device/sas_device_handle'), + ('support_discard', 'queue/discard_granularity'), + ('rotational', 'queue/rotational'), + ('nr_requests', 'queue/nr_requests'), + ] + for key, file_ in facts: + metadata[key] = get_file_contents(os.path.join(sysdir, file_)) + + if block[2] != 'part': + device_slaves = os.listdir(os.path.join(sysdir, 'slaves')) + metadata['partitions'] = get_partitions_facts(sysdir) + + if device_slaves: + metadata['device_nodes'] = ','.join(device_slaves) + else: + if block[2] == 'part': + metadata['device_nodes'] = partitions[devname] + else: + metadata['device_nodes'] = devname + + metadata['actuators'] = None + if os.path.isdir(sysdir + "/queue/independent_access_ranges/"): + actuators = 0 + while os.path.isdir(sysdir + "/queue/independent_access_ranges/" + str(actuators)): + actuators += 1 + metadata['actuators'] = actuators + + metadata['scheduler_mode'] = "" + scheduler = get_file_contents(sysdir + "/queue/scheduler") + if scheduler is not None: + m = re.match(r".*?(\[(.*)\])", scheduler) + if m: + metadata['scheduler_mode'] = m.group(2) + + metadata['partitions'] = get_partitions_facts(sysdir) + + size = get_file_contents(os.path.join(sysdir, 'size'), 0) + + metadata['sectors'] = get_file_contents(os.path.join(sysdir, 'sectors'), 0) + fallback_sectorsize = get_file_contents(sysdir + "/queue/hw_sector_size", 512) + metadata['sectorsize'] = get_file_contents(sysdir + + "/queue/logical_block_size", + fallback_sectorsize) + metadata['size'] = float(size) * 512 + metadata['human_readable_size'] = human_readable_size(metadata['size']) + metadata['path'] = diskname + metadata['type'] = block[2] + + device_facts[diskname] = metadata + return device_facts + +def has_bluestore_label(device_path): + isBluestore = False + bluestoreDiskSignature = 'bluestore block device' # 22 bytes long + + # throws OSError on failure + logger.info("opening device {} to check for BlueStore label".format(device_path)) + try: + with open(device_path, "rb") as fd: + # read first 22 bytes looking for bluestore disk signature + signature = fd.read(22) + if signature.decode('ascii', 'replace') == bluestoreDiskSignature: + isBluestore = True + except IsADirectoryError: + logger.info(f'{device_path} is a directory, skipping.') + + return isBluestore diff --git a/src/ceph-volume/ceph_volume/util/encryption.py b/src/ceph-volume/ceph_volume/util/encryption.py new file mode 100644 index 000000000..f8aea80b4 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/encryption.py @@ -0,0 +1,294 @@ +import base64 +import os +import logging +from ceph_volume import process, conf, terminal +from ceph_volume.util import constants, system +from ceph_volume.util.device import Device +from .prepare import write_keyring +from .disk import lsblk, device_family, get_part_entry_type + +logger = logging.getLogger(__name__) +mlogger = terminal.MultiLogger(__name__) + +def get_key_size_from_conf(): + """ + Return the osd dmcrypt key size from config file. + Default is 512. + """ + default_key_size = '512' + key_size = conf.ceph.get_safe( + 'osd', + 'osd_dmcrypt_key_size', + default='512', check_valid=False) + + if key_size not in ['256', '512']: + logger.warning(("Invalid value set for osd_dmcrypt_key_size ({}). " + "Falling back to {}bits".format(key_size, default_key_size))) + return default_key_size + + return key_size + +def create_dmcrypt_key(): + """ + Create the secret dm-crypt key (KEK) used to encrypt/decrypt the Volume Key. + """ + random_string = os.urandom(128) + key = base64.b64encode(random_string).decode('utf-8') + return key + + +def luks_format(key, device): + """ + Decrypt (open) an encrypted device, previously prepared with cryptsetup + + :param key: dmcrypt secret key, will be used for decrypting + :param device: Absolute path to device + """ + command = [ + 'cryptsetup', + '--batch-mode', # do not prompt + '--key-size', + get_key_size_from_conf(), + '--key-file', # misnomer, should be key + '-', # because we indicate stdin for the key here + 'luksFormat', + device, + ] + process.call(command, stdin=key, terminal_verbose=True, show_command=True) + + +def plain_open(key, device, mapping): + """ + Decrypt (open) an encrypted device, previously prepared with cryptsetup in plain mode + + .. note: ceph-disk will require an additional b64decode call for this to work + + :param key: dmcrypt secret key + :param device: absolute path to device + :param mapping: mapping name used to correlate device. Usually a UUID + """ + command = [ + 'cryptsetup', + '--key-file', + '-', + '--allow-discards', # allow discards (aka TRIM) requests for device + 'open', + device, + mapping, + '--type', 'plain', + '--key-size', '256', + ] + + process.call(command, stdin=key, terminal_verbose=True, show_command=True) + + +def luks_open(key, device, mapping): + """ + Decrypt (open) an encrypted device, previously prepared with cryptsetup + + .. note: ceph-disk will require an additional b64decode call for this to work + + :param key: dmcrypt secret key + :param device: absolute path to device + :param mapping: mapping name used to correlate device. Usually a UUID + """ + command = [ + 'cryptsetup', + '--key-size', + get_key_size_from_conf(), + '--key-file', + '-', + '--allow-discards', # allow discards (aka TRIM) requests for device + 'luksOpen', + device, + mapping, + ] + process.call(command, stdin=key, terminal_verbose=True, show_command=True) + + +def dmcrypt_close(mapping, skip_path_check=False): + """ + Encrypt (close) a device, previously decrypted with cryptsetup + + :param mapping: mapping name or path used to correlate device. + :param skip_path_check: whether we need path presence validation. + """ + if not skip_path_check and not os.path.exists(mapping): + logger.debug('device mapper path does not exist %s' % mapping) + logger.debug('will skip cryptsetup removal') + return + # don't be strict about the remove call, but still warn on the terminal if it fails + process.run(['cryptsetup', 'remove', mapping], stop_on_error=False) + +def get_dmcrypt_key(osd_id, osd_fsid, lockbox_keyring=None): + """ + Retrieve the dmcrypt (secret) key stored initially on the monitor. The key + is sent initially with JSON, and the Monitor then mangles the name to + ``dm-crypt/osd/<fsid>/luks`` + + The ``lockbox.keyring`` file is required for this operation, and it is + assumed it will exist on the path for the same OSD that is being activated. + To support scanning, it is optionally configurable to a custom location + (e.g. inside a lockbox partition mounted in a temporary location) + """ + if lockbox_keyring is None: + lockbox_keyring = '/var/lib/ceph/osd/%s-%s/lockbox.keyring' % (conf.cluster, osd_id) + name = 'client.osd-lockbox.%s' % osd_fsid + config_key = 'dm-crypt/osd/%s/luks' % osd_fsid + + mlogger.info(f'Running ceph config-key get {config_key}') + stdout, stderr, returncode = process.call( + [ + 'ceph', + '--cluster', conf.cluster, + '--name', name, + '--keyring', lockbox_keyring, + 'config-key', + 'get', + config_key + ], + show_command=True, + logfile_verbose=False + ) + if returncode != 0: + raise RuntimeError('Unable to retrieve dmcrypt secret') + return ' '.join(stdout).strip() + + +def write_lockbox_keyring(osd_id, osd_fsid, secret): + """ + Helper to write the lockbox keyring. This is needed because the bluestore OSD will + not persist the keyring. + + For bluestore: A tmpfs filesystem is mounted, so the path can get written + to, but the files are ephemeral, which requires this file to be created + every time it is activated. + """ + if os.path.exists('/var/lib/ceph/osd/%s-%s/lockbox.keyring' % (conf.cluster, osd_id)): + return + + name = 'client.osd-lockbox.%s' % osd_fsid + write_keyring( + osd_id, + secret, + keyring_name='lockbox.keyring', + name=name + ) + + +def status(device): + """ + Capture the metadata information of a possibly encrypted device, returning + a dictionary with all the values found (if any). + + An encrypted device will contain information about a device. Example + successful output looks like:: + + $ cryptsetup status /dev/mapper/ed6b5a26-eafe-4cd4-87e3-422ff61e26c4 + /dev/mapper/ed6b5a26-eafe-4cd4-87e3-422ff61e26c4 is active and is in use. + type: LUKS1 + cipher: aes-xts-plain64 + keysize: 256 bits + device: /dev/sdc2 + offset: 4096 sectors + size: 20740063 sectors + mode: read/write + + As long as the mapper device is in 'open' state, the ``status`` call will work. + + :param device: Absolute path or UUID of the device mapper + """ + command = [ + 'cryptsetup', + 'status', + device, + ] + out, err, code = process.call(command, show_command=True, verbose_on_failure=False) + + metadata = {} + if code != 0: + logger.warning('failed to detect device mapper information') + return metadata + for line in out: + # get rid of lines that might not be useful to construct the report: + if not line.startswith(' '): + continue + try: + column, value = line.split(': ') + except ValueError: + continue + metadata[column.strip()] = value.strip().strip('"') + return metadata + + +def legacy_encrypted(device): + """ + Detect if a device was encrypted with ceph-disk or not. In the case of + encrypted devices, include the type of encryption (LUKS, or PLAIN), and + infer what the lockbox partition is. + + This function assumes that ``device`` will be a partition. + """ + disk_meta = {} + if os.path.isdir(device): + mounts = system.Mounts(paths=True).get_mounts() + # yes, rebind the device variable here because a directory isn't going + # to help with parsing + device = mounts.get(device, [None])[0] + if not device: + raise RuntimeError('unable to determine the device mounted at %s' % device) + metadata = {'encrypted': False, 'type': None, 'lockbox': '', 'device': device} + # check if the device is online/decrypted first + active_mapper = status(device) + if active_mapper: + # normalize a bit to ensure same values regardless of source + metadata['type'] = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks + metadata['encrypted'] = True if metadata['type'] in ['plain', 'luks'] else False + # The true device is now available to this function, so it gets + # re-assigned here for the lockbox checks to succeed (it is not + # possible to guess partitions from a device mapper device otherwise + device = active_mapper.get('device', device) + metadata['device'] = device + else: + uuid = get_part_entry_type(device) + guid_match = constants.ceph_disk_guids.get(uuid, {}) + encrypted_guid = guid_match.get('encrypted', False) + if encrypted_guid: + metadata['encrypted'] = True + metadata['type'] = guid_match['encryption_type'] + + # Lets find the lockbox location now, to do this, we need to find out the + # parent device name for the device so that we can query all of its + # associated devices and *then* look for one that has the 'lockbox' label + # on it. Thanks for being awesome ceph-disk + if not device == 'tmpfs': + disk_meta = lsblk(device, abspath=True) + if not disk_meta: + return metadata + parent_device = disk_meta['PKNAME'] + # With the parent device set, we can now look for the lockbox listing associated devices + devices = [Device(i['NAME']) for i in device_family(parent_device)] + for d in devices: + if d.ceph_disk.type == 'lockbox': + metadata['lockbox'] = d.path + break + return metadata + +def prepare_dmcrypt(key, device, mapping): + """ + Helper for devices that are encrypted. The operations needed for + block, db, wal, or data/journal devices are all the same + """ + if not device: + return '' + # format data device + luks_format( + key, + device + ) + luks_open( + key, + device, + mapping + ) + return '/dev/mapper/%s' % mapping diff --git a/src/ceph-volume/ceph_volume/util/lsmdisk.py b/src/ceph-volume/ceph_volume/util/lsmdisk.py new file mode 100644 index 000000000..3009469d0 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/lsmdisk.py @@ -0,0 +1,196 @@ +""" +This module handles the interaction with libstoragemgmt for local disk +devices. Interaction may fail with LSM for a number of issues, but the +intent here is to make this a soft fail, since LSM related data is not +a critical component of ceph-volume. +""" +import logging + +try: + from lsm import LocalDisk, LsmError + from lsm import Disk as lsm_Disk +except ImportError: + lsm_available = False + transport_map = {} + health_map = {} + lsm_Disk = None +else: + lsm_available = True + transport_map = { + lsm_Disk.LINK_TYPE_UNKNOWN: "Unavailable", + lsm_Disk.LINK_TYPE_FC: "Fibre Channel", + lsm_Disk.LINK_TYPE_SSA: "IBM SSA", + lsm_Disk.LINK_TYPE_SBP: "Serial Bus", + lsm_Disk.LINK_TYPE_SRP: "SCSI RDMA", + lsm_Disk.LINK_TYPE_ISCSI: "iSCSI", + lsm_Disk.LINK_TYPE_SAS: "SAS", + lsm_Disk.LINK_TYPE_ADT: "ADT (Tape)", + lsm_Disk.LINK_TYPE_ATA: "ATA/SATA", + lsm_Disk.LINK_TYPE_USB: "USB", + lsm_Disk.LINK_TYPE_SOP: "SCSI over PCI-E", + lsm_Disk.LINK_TYPE_PCIE: "PCI-E", + } + health_map = { + lsm_Disk.HEALTH_STATUS_UNKNOWN: "Unknown", + lsm_Disk.HEALTH_STATUS_FAIL: "Fail", + lsm_Disk.HEALTH_STATUS_WARN: "Warn", + lsm_Disk.HEALTH_STATUS_GOOD: "Good", + } + +logger = logging.getLogger(__name__) + + +class LSMDisk: + def __init__(self, dev_path): + self.dev_path = dev_path + self.error_list = set() + + if lsm_available: + self.lsm_available = True + self.disk = LocalDisk() + else: + self.lsm_available = False + self.error_list.add("libstoragemgmt (lsm module) is unavailable") + logger.info("LSM information is unavailable: libstoragemgmt is not installed") + self.disk = None + + self.led_bits = None + + @property + def errors(self): + """show any errors that the LSM interaction has encountered (str)""" + return ", ".join(self.error_list) + + def _query_lsm(self, func, path): + """Common method used to call the LSM functions, returning the function's result or None""" + + # if disk is None, lsm is unavailable so all calls should return None + if self.disk is None: + return None + + method = getattr(self.disk, func) + try: + output = method(path) + except LsmError as err: + logger.error("LSM Error: {}".format(err._msg)) + self.error_list.add(err._msg) + return None + else: + return output + + @property + def led_status(self): + """Fetch LED status, store in the LSMDisk object and return current status (int)""" + if self.led_bits is None: + self.led_bits = self._query_lsm('led_status_get', self.dev_path) or 1 + return self.led_bits + else: + return self.led_bits + + @property + def led_ident_state(self): + """Query a disks IDENT LED state to discover when it is On, Off or Unknown (str)""" + if self.led_status == 1: + return "Unsupported" + if self.led_status & lsm_Disk.LED_STATUS_IDENT_ON == lsm_Disk.LED_STATUS_IDENT_ON: + return "On" + elif self.led_status & lsm_Disk.LED_STATUS_IDENT_OFF == lsm_Disk.LED_STATUS_IDENT_OFF: + return "Off" + elif self.led_status & lsm_Disk.LED_STATUS_IDENT_UNKNOWN == lsm_Disk.LED_STATUS_IDENT_UNKNOWN: + return "Unknown" + + return "Unsupported" + + @property + def led_fault_state(self): + """Query a disks FAULT LED state to discover when it is On, Off or Unknown (str)""" + if self.led_status == 1: + return "Unsupported" + if self.led_status & lsm_Disk.LED_STATUS_FAULT_ON == lsm_Disk.LED_STATUS_FAULT_ON: + return "On" + elif self.led_status & lsm_Disk.LED_STATUS_FAULT_OFF == lsm_Disk.LED_STATUS_FAULT_OFF: + return "Off" + elif self.led_status & lsm_Disk.LED_STATUS_FAULT_UNKNOWN == lsm_Disk.LED_STATUS_FAULT_UNKNOWN: + return "Unknown" + + return "Unsupported" + + @property + def led_ident_support(self): + """Query the LED state to determine IDENT support: Unknown, Supported, Unsupported (str)""" + if self.led_status == 1: + return "Unknown" + + ident_states = ( + lsm_Disk.LED_STATUS_IDENT_ON + + lsm_Disk.LED_STATUS_IDENT_OFF + + lsm_Disk.LED_STATUS_IDENT_UNKNOWN + ) + + if (self.led_status & ident_states) == 0: + return "Unsupported" + + return "Supported" + + @property + def led_fault_support(self): + """Query the LED state to determine FAULT support: Unknown, Supported, Unsupported (str)""" + if self.led_status == 1: + return "Unknown" + + fail_states = ( + lsm_Disk.LED_STATUS_FAULT_ON + + lsm_Disk.LED_STATUS_FAULT_OFF + + lsm_Disk.LED_STATUS_FAULT_UNKNOWN + ) + + if self.led_status & fail_states == 0: + return "Unsupported" + + return "Supported" + + @property + def health(self): + """Determine the health of the disk from LSM : Unknown, Fail, Warn or Good (str)""" + _health_int = self._query_lsm('health_status_get', self.dev_path) + return health_map.get(_health_int, "Unknown") + + @property + def transport(self): + """Translate a disks link type to a human readable format (str)""" + _link_type = self._query_lsm('link_type_get', self.dev_path) + return transport_map.get(_link_type, "Unknown") + + + @property + def media_type(self): + """Use the rpm value to determine the type of disk media: Flash or HDD (str)""" + _rpm = self._query_lsm('rpm_get', self.dev_path) + if _rpm is not None: + if _rpm == 0: + return "Flash" + elif _rpm > 1: + return "HDD" + + return "Unknown" + + def json_report(self): + """Return the LSM related metadata for the current local disk (dict)""" + if self.lsm_available: + return { + "serialNum": self._query_lsm('serial_num_get', self.dev_path) or "Unknown", + "transport": self.transport, + "mediaType": self.media_type, + "rpm": self._query_lsm('rpm_get', self.dev_path) or "Unknown", + "linkSpeed": self._query_lsm('link_speed_get', self.dev_path) or "Unknown", + "health": self.health, + "ledSupport": { + "IDENTsupport": self.led_ident_support, + "IDENTstatus": self.led_ident_state, + "FAILsupport": self.led_fault_support, + "FAILstatus": self.led_fault_state, + }, + "errors": list(self.error_list) + } + else: + return {} diff --git a/src/ceph-volume/ceph_volume/util/prepare.py b/src/ceph-volume/ceph_volume/util/prepare.py new file mode 100644 index 000000000..576c08617 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/prepare.py @@ -0,0 +1,460 @@ +""" +These utilities for prepare provide all the pieces needed to prepare a device +but also a compounded ("single call") helper to do them in order. Some plugins +may want to change some part of the process, while others might want to consume +the single-call helper +""" +import errno +import os +import logging +import json +import time +from ceph_volume import process, conf, terminal +from ceph_volume.util import system, constants, str_to_int, disk + +logger = logging.getLogger(__name__) +mlogger = terminal.MultiLogger(__name__) + + +def create_key(): + stdout, stderr, returncode = process.call( + ['ceph-authtool', '--gen-print-key'], + show_command=True, + logfile_verbose=False) + if returncode != 0: + raise RuntimeError('Unable to generate a new auth key') + return ' '.join(stdout).strip() + + +def write_keyring(osd_id, secret, keyring_name='keyring', name=None): + """ + Create a keyring file with the ``ceph-authtool`` utility. Constructs the + path over well-known conventions for the OSD, and allows any other custom + ``name`` to be set. + + :param osd_id: The ID for the OSD to be used + :param secret: The key to be added as (as a string) + :param name: Defaults to 'osd.{ID}' but can be used to add other client + names, specifically for 'lockbox' type of keys + :param keyring_name: Alternative keyring name, for supporting other + types of keys like for lockbox + """ + osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name) + name = name or 'osd.%s' % str(osd_id) + mlogger.info(f'Creating keyring file for {name}') + process.call( + [ + 'ceph-authtool', osd_keyring, + '--create-keyring', + '--name', name, + '--add-key', secret + ], + logfile_verbose=False) + system.chown(osd_keyring) + + +def get_block_db_size(lv_format=True): + """ + Helper to retrieve the size (defined in megabytes in ceph.conf) to create + the block.db logical volume, it "translates" the string into a float value, + then converts that into gigabytes, and finally (optionally) it formats it + back as a string so that it can be used for creating the LV. + + :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size + would result in '5G', otherwise it will return a ``Size`` object. + + .. note: Configuration values are in bytes, unlike journals which + are defined in gigabytes + """ + conf_db_size = None + try: + conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None) + except RuntimeError: + logger.exception("failed to load ceph configuration, will use defaults") + + if not conf_db_size: + logger.debug( + 'block.db has no size configuration, will fallback to using as much as possible' + ) + # TODO better to return disk.Size(b=0) here + return None + logger.debug('bluestore_block_db_size set to %s' % conf_db_size) + db_size = disk.Size(b=str_to_int(conf_db_size)) + + if db_size < disk.Size(gb=2): + mlogger.error('Refusing to continue with configured size for block.db') + raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size) + if lv_format: + return '%sG' % db_size.gb.as_int() + return db_size + +def get_block_wal_size(lv_format=True): + """ + Helper to retrieve the size (defined in megabytes in ceph.conf) to create + the block.wal logical volume, it "translates" the string into a float value, + then converts that into gigabytes, and finally (optionally) it formats it + back as a string so that it can be used for creating the LV. + + :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size + would result in '5G', otherwise it will return a ``Size`` object. + + .. note: Configuration values are in bytes, unlike journals which + are defined in gigabytes + """ + conf_wal_size = None + try: + conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None) + except RuntimeError: + logger.exception("failed to load ceph configuration, will use defaults") + + if not conf_wal_size: + logger.debug( + 'block.wal has no size configuration, will fallback to using as much as possible' + ) + return None + logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size) + wal_size = disk.Size(b=str_to_int(conf_wal_size)) + + if wal_size < disk.Size(gb=2): + mlogger.error('Refusing to continue with configured size for block.wal') + raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size) + if lv_format: + return '%sG' % wal_size.gb.as_int() + return wal_size + + +def create_id(fsid, json_secrets, osd_id=None): + """ + :param fsid: The osd fsid to create, always required + :param json_secrets: a json-ready object with whatever secrets are wanted + to be passed to the monitor + :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the + id does not exist in the cluster a new ID will be created + """ + bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster + cmd = [ + 'ceph', + '--cluster', conf.cluster, + '--name', 'client.bootstrap-osd', + '--keyring', bootstrap_keyring, + '-i', '-', + 'osd', 'new', fsid + ] + if osd_id is not None: + if osd_id_available(osd_id): + cmd.append(osd_id) + else: + raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id)) + stdout, stderr, returncode = process.call( + cmd, + stdin=json_secrets, + show_command=True + ) + if returncode != 0: + raise RuntimeError('Unable to create a new OSD id') + return ' '.join(stdout).strip() + + +def osd_id_available(osd_id): + """ + Checks to see if an osd ID exists and if it's available for + reuse. Returns True if it is, False if it isn't. + + :param osd_id: The osd ID to check + """ + if osd_id is None: + return False + + bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster + stdout, stderr, returncode = process.call( + [ + 'ceph', + '--cluster', conf.cluster, + '--name', 'client.bootstrap-osd', + '--keyring', bootstrap_keyring, + 'osd', + 'tree', + '-f', 'json', + ], + show_command=True + ) + if returncode != 0: + raise RuntimeError('Unable check if OSD id exists: %s' % osd_id) + + output = json.loads(''.join(stdout).strip()) + osds = output['nodes'] + osd = [osd for osd in osds if str(osd['id']) == str(osd_id)] + if not osd or (osd and osd[0].get('status') == "destroyed"): + return True + return False + + +def mount_tmpfs(path): + process.run([ + 'mount', + '-t', + 'tmpfs', 'tmpfs', + path + ]) + + # Restore SELinux context + system.set_context(path) + + +def create_osd_path(osd_id, tmpfs=False): + path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) + system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)) + if tmpfs: + mount_tmpfs(path) + + +def format_device(device): + # only supports xfs + command = ['mkfs', '-t', 'xfs'] + + # get the mkfs options if any for xfs, + # fallback to the default options defined in constants.mkfs + flags = conf.ceph.get_list( + 'osd', + 'osd_mkfs_options_xfs', + default=constants.mkfs.get('xfs'), + split=' ', + ) + + # always force + if '-f' not in flags: + flags.insert(0, '-f') + + command.extend(flags) + command.append(device) + process.run(command) + + +def _normalize_mount_flags(flags, extras=None): + """ + Mount flag options have to be a single string, separated by a comma. If the + flags are separated by spaces, or with commas and spaces in ceph.conf, the + mount options will be passed incorrectly. + + This will help when parsing ceph.conf values return something like:: + + ["rw,", "exec,"] + + Or:: + + [" rw ,", "exec"] + + :param flags: A list of flags, or a single string of mount flags + :param extras: Extra set of mount flags, useful when custom devices like VDO need + ad-hoc mount configurations + """ + # Instead of using set(), we append to this new list here, because set() + # will create an arbitrary order on the items that is made worst when + # testing with tools like tox that includes a randomizer seed. By + # controlling the order, it is easier to correctly assert the expectation + unique_flags = [] + if isinstance(flags, list): + if extras: + flags.extend(extras) + + # ensure that spaces and commas are removed so that they can join + # correctly, remove duplicates + for f in flags: + if f and f not in unique_flags: + unique_flags.append(f.strip().strip(',')) + return ','.join(unique_flags) + + # split them, clean them, and join them back again + flags = flags.strip().split(' ') + if extras: + flags.extend(extras) + + # remove possible duplicates + for f in flags: + if f and f not in unique_flags: + unique_flags.append(f.strip().strip(',')) + flags = ','.join(unique_flags) + # Before returning, split them again, since strings can be mashed up + # together, preventing removal of duplicate entries + return ','.join(set(flags.split(','))) + + +def mount_osd(device, osd_id, **kw): + extras = [] + is_vdo = kw.get('is_vdo', '0') + if is_vdo == '1': + extras = ['discard'] + destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) + command = ['mount', '-t', 'xfs', '-o'] + flags = conf.ceph.get_list( + 'osd', + 'osd_mount_options_xfs', + default=constants.mount.get('xfs'), + split=' ', + ) + command.append( + _normalize_mount_flags(flags, extras=extras) + ) + command.append(device) + command.append(destination) + process.run(command) + + # Restore SELinux context + system.set_context(destination) + + +def _link_device(device, device_type, osd_id): + """ + Allow linking any device type in an OSD directory. ``device`` must the be + source, with an absolute path and ``device_type`` will be the destination + name, like 'journal', or 'block' + """ + device_path = '/var/lib/ceph/osd/%s-%s/%s' % ( + conf.cluster, + osd_id, + device_type + ) + command = ['ln', '-s', device, device_path] + system.chown(device) + + process.run(command) + +def _validate_bluestore_device(device, excepted_device_type, osd_uuid): + """ + Validate whether the given device is truly what it is supposed to be + """ + + out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device]) + if err: + terminal.error('ceph-bluestore-tool failed to run. %s'% err) + raise SystemExit(1) + if ret: + terminal.error('no label on %s'% device) + raise SystemExit(1) + oj = json.loads(''.join(out)) + if device not in oj: + terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device) + raise SystemExit(1) + current_device_type = oj[device]['description'] + if current_device_type != excepted_device_type: + terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type)) + raise SystemExit(1) + current_osd_uuid = oj[device]['osd_uuid'] + if current_osd_uuid != osd_uuid: + terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid)) + raise SystemExit(1) + + +def link_block(block_device, osd_id): + _link_device(block_device, 'block', osd_id) + + +def link_wal(wal_device, osd_id, osd_uuid=None): + _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid) + _link_device(wal_device, 'block.wal', osd_id) + + +def link_db(db_device, osd_id, osd_uuid=None): + _validate_bluestore_device(db_device, 'bluefs db', osd_uuid) + _link_device(db_device, 'block.db', osd_id) + + +def get_monmap(osd_id): + """ + Before creating the OSD files, a monmap needs to be retrieved so that it + can be used to tell the monitor(s) about the new OSD. A call will look like:: + + ceph --cluster ceph --name client.bootstrap-osd \ + --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \ + mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap + """ + path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) + bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster + monmap_destination = os.path.join(path, 'activate.monmap') + + process.run([ + 'ceph', + '--cluster', conf.cluster, + '--name', 'client.bootstrap-osd', + '--keyring', bootstrap_keyring, + 'mon', 'getmap', '-o', monmap_destination + ]) + + +def get_osdspec_affinity(): + return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '') + + +def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False): + """ + Create the files for the OSD to function. A normal call will look like: + + ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ + --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ + --osd-data /var/lib/ceph/osd/ceph-0 \ + --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ + --keyring /var/lib/ceph/osd/ceph-0/keyring \ + --setuser ceph --setgroup ceph + + In some cases it is required to use the keyring, when it is passed in as + a keyword argument it is used as part of the ceph-osd command + """ + path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) + monmap = os.path.join(path, 'activate.monmap') + + system.chown(path) + + base_command = [ + 'ceph-osd', + '--cluster', conf.cluster, + '--osd-objectstore', 'bluestore', + '--mkfs', + '-i', osd_id, + '--monmap', monmap, + ] + + supplementary_command = [ + '--osd-data', path, + '--osd-uuid', fsid, + '--setuser', 'ceph', + '--setgroup', 'ceph' + ] + + if keyring is not None: + base_command.extend(['--keyfile', '-']) + + if wal: + base_command.extend( + ['--bluestore-block-wal-path', wal] + ) + system.chown(wal) + + if db: + base_command.extend( + ['--bluestore-block-db-path', db] + ) + system.chown(db) + + if get_osdspec_affinity(): + base_command.extend(['--osdspec-affinity', get_osdspec_affinity()]) + + command = base_command + supplementary_command + + """ + When running in containers the --mkfs on raw device sometimes fails + to acquire a lock through flock() on the device because systemd-udevd holds one temporarily. + See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock. + Because this is really transient, we retry up to 5 times and wait for 1 sec in-between + """ + for retry in range(5): + _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True) + if returncode == 0: + break + else: + if returncode == errno.EWOULDBLOCK: + time.sleep(1) + logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry) + continue + else: + raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) + diff --git a/src/ceph-volume/ceph_volume/util/system.py b/src/ceph-volume/ceph_volume/util/system.py new file mode 100644 index 000000000..590a0599b --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/system.py @@ -0,0 +1,419 @@ +import errno +import logging +import os +import pwd +import platform +import tempfile +import uuid +import subprocess +import threading +from ceph_volume import process, terminal +from . import as_string + +# python2 has no FileNotFoundError +try: + FileNotFoundError +except NameError: + FileNotFoundError = OSError + +logger = logging.getLogger(__name__) +mlogger = terminal.MultiLogger(__name__) + +# TODO: get these out of here and into a common area for others to consume +if platform.system() == 'FreeBSD': + FREEBSD = True + DEFAULT_FS_TYPE = 'zfs' + PROCDIR = '/compat/linux/proc' + # FreeBSD does not have blockdevices any more + BLOCKDIR = '/dev' + ROOTGROUP = 'wheel' +else: + FREEBSD = False + DEFAULT_FS_TYPE = 'xfs' + PROCDIR = '/proc' + BLOCKDIR = '/sys/block' + ROOTGROUP = 'root' + +host_rootfs = '/rootfs' +run_host_cmd = [ + 'nsenter', + '--mount={}/proc/1/ns/mnt'.format(host_rootfs), + '--ipc={}/proc/1/ns/ipc'.format(host_rootfs), + '--net={}/proc/1/ns/net'.format(host_rootfs), + '--uts={}/proc/1/ns/uts'.format(host_rootfs) +] + +def generate_uuid(): + return str(uuid.uuid4()) + +def find_executable_on_host(locations=[], executable='', binary_check='/bin/ls'): + paths = ['{}/{}'.format(location, executable) for location in locations] + command = [] + command.extend(run_host_cmd + [binary_check] + paths) + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE, + close_fds=True + ) + stdout = as_string(process.stdout.read()) + if stdout: + executable_on_host = stdout.split('\n')[0] + logger.info('Executable {} found on the host, will use {}'.format(executable, executable_on_host)) + return executable_on_host + else: + logger.warning('Executable {} not found on the host, will return {} as-is'.format(executable, executable)) + return executable + +def which(executable, run_on_host=False): + """find the location of an executable""" + def _get_path(executable, locations): + for location in locations: + executable_path = os.path.join(location, executable) + if os.path.exists(executable_path) and os.path.isfile(executable_path): + return executable_path + return None + + static_locations = ( + '/usr/local/bin', + '/bin', + '/usr/bin', + '/usr/local/sbin', + '/usr/sbin', + '/sbin', + ) + + if not run_on_host: + path = os.getenv('PATH', '') + path_locations = path.split(':') + exec_in_path = _get_path(executable, path_locations) + if exec_in_path: + return exec_in_path + mlogger.warning('Executable {} not in PATH: {}'.format(executable, path)) + + exec_in_static_locations = _get_path(executable, static_locations) + if exec_in_static_locations: + mlogger.warning('Found executable under {}, please ensure $PATH is set correctly!'.format(exec_in_static_locations)) + return exec_in_static_locations + else: + executable = find_executable_on_host(static_locations, executable) + + # At this point, either `find_executable_on_host()` found an executable on the host + # or we fallback to just returning the argument as-is, to prevent a hard fail, and + # hoping that the system might have the executable somewhere custom + return executable + +def get_ceph_user_ids(): + """ + Return the id and gid of the ceph user + """ + try: + user = pwd.getpwnam('ceph') + except KeyError: + # is this even possible? + raise RuntimeError('"ceph" user is not available in the current system') + return user[2], user[3] + + +def get_file_contents(path, default=''): + contents = default + if not os.path.exists(path): + return contents + try: + with open(path, 'r') as open_file: + contents = open_file.read().strip() + except Exception: + logger.exception('Failed to read contents from: %s' % path) + + return contents + + +def mkdir_p(path, chown=True): + """ + A `mkdir -p` that defaults to chown the path to the ceph user + """ + try: + os.mkdir(path) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + if chown: + uid, gid = get_ceph_user_ids() + os.chown(path, uid, gid) + + +def chown(path, recursive=True): + """ + ``chown`` a path to the ceph user (uid and guid fetched at runtime) + """ + uid, gid = get_ceph_user_ids() + if os.path.islink(path): + process.run(['chown', '-h', 'ceph:ceph', path]) + path = os.path.realpath(path) + if recursive: + process.run(['chown', '-R', 'ceph:ceph', path]) + else: + os.chown(path, uid, gid) + + +def is_binary(path): + """ + Detect if a file path is a binary or not. Will falsely report as binary + when utf-16 encoded. In the ceph universe there is no such risk (yet) + """ + with open(path, 'rb') as fp: + contents = fp.read(8192) + if b'\x00' in contents: # a null byte may signal binary + return True + return False + + +class tmp_mount(object): + """ + Temporarily mount a device on a temporary directory, + and unmount it upon exit + + When ``encrypted`` is set to ``True``, the exit method will call out to + close the device so that it doesn't remain open after mounting. It is + assumed that it will be open because otherwise it wouldn't be possible to + mount in the first place + """ + + def __init__(self, device, encrypted=False): + self.device = device + self.path = None + self.encrypted = encrypted + + def __enter__(self): + self.path = tempfile.mkdtemp() + process.run([ + 'mount', + '-v', + self.device, + self.path + ]) + return self.path + + def __exit__(self, exc_type, exc_val, exc_tb): + process.run([ + 'umount', + '-v', + self.path + ]) + if self.encrypted: + # avoid a circular import from the encryption module + from ceph_volume.util import encryption + encryption.dmcrypt_close(self.device) + + +def unmount_tmpfs(path): + """ + Removes the mount at the given path iff the path is a tmpfs mount point. + Otherwise no action is taken. + """ + _out, _err, rc = process.call(['findmnt', '-t', 'tmpfs', '-M', path]) + if rc != 0: + logger.info('{} does not appear to be a tmpfs mount'.format(path)) + else: + logger.info('Unmounting tmpfs path at {}'.format( path)) + unmount(path) + + +def unmount(path): + """ + Removes mounts at the given path + """ + process.run([ + 'umount', + '-v', + path, + ]) + + +def path_is_mounted(path, destination=None): + """ + Check if the given path is mounted + """ + m = Mounts(paths=True) + mounts = m.get_mounts() + realpath = os.path.realpath(path) + mounted_locations = mounts.get(realpath, []) + + if destination: + return destination in mounted_locations + return mounted_locations != [] + + +def device_is_mounted(dev, destination=None): + """ + Check if the given device is mounted, optionally validating that a + destination exists + """ + plain_mounts = Mounts(devices=True) + realpath_mounts = Mounts(devices=True, realpath=True) + + realpath_dev = os.path.realpath(dev) if dev.startswith('/') else dev + destination = os.path.realpath(destination) if destination else None + # plain mounts + plain_dev_mounts = plain_mounts.get_mounts().get(dev, []) + realpath_dev_mounts = plain_mounts.get_mounts().get(realpath_dev, []) + # realpath mounts + plain_dev_real_mounts = realpath_mounts.get_mounts().get(dev, []) + realpath_dev_real_mounts = realpath_mounts.get_mounts().get(realpath_dev, []) + + mount_locations = [ + plain_dev_mounts, + realpath_dev_mounts, + plain_dev_real_mounts, + realpath_dev_real_mounts + ] + + for mounts in mount_locations: + if mounts: # we have a matching mount + if destination: + if destination in mounts: + logger.info( + '%s detected as mounted, exists at destination: %s', dev, destination + ) + return True + else: + logger.info('%s was found as mounted', dev) + return True + logger.info('%s was not found as mounted', dev) + return False + +class Mounts(object): + excluded_paths = [] + + def __init__(self, devices=False, paths=False, realpath=False): + self.devices = devices + self.paths = paths + self.realpath = realpath + + def safe_realpath(self, path, timeout=0.2): + def _realpath(path, result): + p = os.path.realpath(path) + result.append(p) + + result = [] + t = threading.Thread(target=_realpath, args=(path, result)) + t.setDaemon(True) + t.start() + t.join(timeout) + if t.is_alive(): + return None + return result[0] + + def get_mounts(self): + """ + Create a mapping of all available system mounts so that other helpers can + detect nicely what path or device is mounted + + It ignores (most of) non existing devices, but since some setups might need + some extra device information, it will make an exception for: + + - tmpfs + - devtmpfs + - /dev/root + + If ``devices`` is set to ``True`` the mapping will be a device-to-path(s), + if ``paths`` is set to ``True`` then the mapping will be + a path-to-device(s) + + :param realpath: Resolve devices to use their realpaths. This is useful for + paths like LVM where more than one path can point to the same device + """ + devices_mounted = {} + paths_mounted = {} + do_not_skip = ['tmpfs', 'devtmpfs', '/dev/root'] + default_to_devices = self.devices is False and self.paths is False + + + with open(PROCDIR + '/mounts', 'rb') as mounts: + proc_mounts = mounts.readlines() + + for line in proc_mounts: + fields = [as_string(f) for f in line.split()] + if len(fields) < 3: + continue + if fields[0] in Mounts.excluded_paths or \ + fields[1] in Mounts.excluded_paths: + continue + if self.realpath: + if fields[0].startswith('/'): + device = self.safe_realpath(fields[0]) + if device is None: + logger.warning(f"Can't get realpath on {fields[0]}, skipping.") + Mounts.excluded_paths.append(fields[0]) + continue + else: + device = fields[0] + else: + device = fields[0] + path = self.safe_realpath(fields[1]) + if path is None: + logger.warning(f"Can't get realpath on {fields[1]}, skipping.") + Mounts.excluded_paths.append(fields[1]) + continue + # only care about actual existing devices + if not os.path.exists(device) or not device.startswith('/'): + if device not in do_not_skip: + continue + if device in devices_mounted.keys(): + devices_mounted[device].append(path) + else: + devices_mounted[device] = [path] + if path in paths_mounted.keys(): + paths_mounted[path].append(device) + else: + paths_mounted[path] = [device] + + # Default to returning information for devices if + if self.devices is True or default_to_devices: + return devices_mounted + else: + return paths_mounted + + +def set_context(path, recursive=False): + """ + Calls ``restorecon`` to set the proper context on SELinux systems. Only if + the ``restorecon`` executable is found anywhere in the path it will get + called. + + If the ``CEPH_VOLUME_SKIP_RESTORECON`` environment variable is set to + any of: "1", "true", "yes" the call will be skipped as well. + + Finally, if SELinux is not enabled, or not available in the system, + ``restorecon`` will not be called. This is checked by calling out to the + ``selinuxenabled`` executable. If that tool is not installed or returns + a non-zero exit status then no further action is taken and this function + will return. + """ + skip = os.environ.get('CEPH_VOLUME_SKIP_RESTORECON', '') + if skip.lower() in ['1', 'true', 'yes']: + logger.info( + 'CEPH_VOLUME_SKIP_RESTORECON environ is set, will not call restorecon' + ) + return + + try: + stdout, stderr, code = process.call(['selinuxenabled'], + verbose_on_failure=False) + except FileNotFoundError: + logger.info('No SELinux found, skipping call to restorecon') + return + + if code != 0: + logger.info('SELinux is not enabled, will not call restorecon') + return + + # restore selinux context to default policy values + if which('restorecon').startswith('/'): + if recursive: + process.run(['restorecon', '-R', path]) + else: + process.run(['restorecon', path]) diff --git a/src/ceph-volume/ceph_volume/util/templates.py b/src/ceph-volume/ceph_volume/util/templates.py new file mode 100644 index 000000000..85a366d26 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/templates.py @@ -0,0 +1,49 @@ + +osd_header = """ +{:-^100}""".format('') + + +osd_component_titles = """ + Type Path LV Size % of device""" + + +osd_reused_id = """ + OSD id {id_: <55}""" + + +osd_component = """ + {_type: <15} {path: <55} {size: <15} {percent:.2%}""" + + +osd_encryption = """ + encryption: {enc: <15}""" + + +total_osds = """ +Total OSDs: {total_osds} +""" + + +def filtered_devices(devices): + string = """ +Filtered Devices:""" + for device, info in devices.items(): + string += """ + %s""" % device + + for reason in info['reasons']: + string += """ + %s""" % reason + + string += "\n" + return string + + +ssd_volume_group = """ +Solid State VG: + Targets: {target: <25} Total size: {total_lv_size: <25} + Total LVs: {total_lvs: <25} Size per LV: {lv_size: <25} + Devices: {block_db_devices} +""" + + |