diff options
Diffstat (limited to 'src/ceph-volume/ceph_volume/util')
-rw-r--r-- | src/ceph-volume/ceph_volume/util/__init__.py | 108 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/arg_validators.py | 150 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/constants.py | 46 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/device.py | 549 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/disk.py | 804 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/encryption.py | 263 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/prepare.py | 531 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/system.py | 346 | ||||
-rw-r--r-- | src/ceph-volume/ceph_volume/util/templates.py | 49 |
9 files changed, 2846 insertions, 0 deletions
diff --git a/src/ceph-volume/ceph_volume/util/__init__.py b/src/ceph-volume/ceph_volume/util/__init__.py new file mode 100644 index 00000000..1b5afe97 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/__init__.py @@ -0,0 +1,108 @@ +import logging +from math import floor +from ceph_volume import terminal + +try: + input = raw_input # pylint: disable=redefined-builtin +except NameError: + pass + +logger = logging.getLogger(__name__) + + +def as_string(string): + """ + Ensure that whatever type of string is incoming, it is returned as an + actual string, versus 'bytes' which Python 3 likes to use. + """ + if isinstance(string, bytes): + # we really ignore here if we can't properly decode with utf-8 + return string.decode('utf-8', 'ignore') + return string + + +def as_bytes(string): + """ + Ensure that whatever type of string is incoming, it is returned as bytes, + encoding to utf-8 otherwise + """ + if isinstance(string, bytes): + return string + return string.encode('utf-8', errors='ignore') + + +def str_to_int(string, round_down=True): + """ + Parses a string number into an integer, optionally converting to a float + and rounding down. + + Some LVM values may come with a comma instead of a dot to define decimals. + This function normalizes a comma into a dot + """ + error_msg = "Unable to convert to integer: '%s'" % str(string) + try: + integer = float(string.replace(',', '.')) + except AttributeError: + # this might be a integer already, so try to use it, otherwise raise + # the original exception + if isinstance(string, (int, float)): + integer = string + else: + logger.exception(error_msg) + raise RuntimeError(error_msg) + except (TypeError, ValueError): + logger.exception(error_msg) + raise RuntimeError(error_msg) + + if round_down: + integer = floor(integer) + else: + integer = round(integer) + return int(integer) + + +def str_to_bool(val): + """ + Convert a string representation of truth to True or False + + True values are 'y', 'yes', or ''; case-insensitive + False values are 'n', or 'no'; case-insensitive + Raises ValueError if 'val' is anything else. + """ + true_vals = ['yes', 'y', ''] + false_vals = ['no', 'n'] + try: + val = val.lower() + except AttributeError: + val = str(val).lower() + if val in true_vals: + return True + elif val in false_vals: + return False + else: + raise ValueError("Invalid input value: %s" % val) + + +def prompt_bool(question, input_=None): + """ + Interface to prompt a boolean (or boolean-like) response from a user. + Usually a confirmation. + """ + input_prompt = input_ or input + prompt_format = '--> {question} '.format(question=question) + response = input_prompt(prompt_format) + try: + return str_to_bool(response) + except ValueError: + terminal.error('Valid true responses are: y, yes, <Enter>') + terminal.error('Valid false responses are: n, no') + terminal.error('That response was invalid, please try again') + return prompt_bool(question, input_=input_prompt) + +def merge_dict(x, y): + """ + Return two dicts merged + """ + z = x.copy() + z.update(y) + return z
\ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/util/arg_validators.py b/src/ceph-volume/ceph_volume/util/arg_validators.py new file mode 100644 index 00000000..94cb4f69 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/arg_validators.py @@ -0,0 +1,150 @@ +import argparse +import os +from ceph_volume import terminal +from ceph_volume import decorators +from ceph_volume.util import disk +from ceph_volume.util.device import Device + + +class ValidDevice(object): + + def __init__(self, as_string=False, gpt_ok=False): + self.as_string = as_string + self.gpt_ok = gpt_ok + + def __call__(self, dev_path): + device = self._is_valid_device(dev_path) + return self._format_device(device) + + def _format_device(self, device): + if self.as_string: + if device.is_lv: + # all codepaths expect an lv path to be returned in this format + return "{}/{}".format(device.vg_name, device.lv_name) + return device.path + return device + + def _is_valid_device(self, dev_path): + device = Device(dev_path) + error = None + if not device.exists: + error = "Unable to proceed with non-existing device: %s" % dev_path + # FIXME this is not a nice API, this validator was meant to catch any + # non-existing devices upfront, not check for gpt headers. Now this + # needs to optionally skip checking gpt headers which is beyond + # verifying if the device exists. The better solution would be to + # configure this with a list of checks that can be excluded/included on + # __init__ + elif device.has_gpt_headers and not self.gpt_ok: + error = "GPT headers found, they must be removed on: %s" % dev_path + + if error: + raise argparse.ArgumentError(None, error) + + return device + + +class ValidBatchDevice(ValidDevice): + + def __call__(self, dev_path): + dev = self._is_valid_device(dev_path) + if dev.is_partition: + raise argparse.ArgumentError( + None, + '{} is a partition, please pass ' + 'LVs or raw block devices'.format(dev_path)) + return self._format_device(dev) + + +class OSDPath(object): + """ + Validate path exists and it looks like an OSD directory. + """ + + @decorators.needs_root + def __call__(self, string): + if not os.path.exists(string): + error = "Path does not exist: %s" % string + raise argparse.ArgumentError(None, error) + + arg_is_partition = disk.is_partition(string) + if arg_is_partition: + return os.path.abspath(string) + absolute_path = os.path.abspath(string) + if not os.path.isdir(absolute_path): + error = "Argument is not a directory or device which is required to scan" + raise argparse.ArgumentError(None, error) + key_files = ['ceph_fsid', 'fsid', 'keyring', 'ready', 'type', 'whoami'] + dir_files = os.listdir(absolute_path) + for key_file in key_files: + if key_file not in dir_files: + terminal.error('All following files must exist in path: %s' % ' '.join(key_files)) + error = "Required file (%s) was not found in OSD dir path: %s" % ( + key_file, + absolute_path + ) + raise argparse.ArgumentError(None, error) + + return os.path.abspath(string) + + +def exclude_group_options(parser, groups, argv=None): + """ + ``argparse`` has the ability to check for mutually exclusive options, but + it only allows a basic XOR behavior: only one flag can be used from + a defined group of options. This doesn't help when two groups of options + need to be separated. For example, with filestore and bluestore, neither + set can be used in conjunction with the other set. + + This helper validator will consume the parser to inspect the group flags, + and it will group them together from ``groups``. This allows proper error + reporting, matching each incompatible flag with its group name. + + :param parser: The argparse object, once it has configured all flags. It is + required to contain the group names being used to validate. + :param groups: A list of group names (at least two), with the same used for + ``add_argument_group`` + :param argv: Consume the args (sys.argv) directly from this argument + + .. note: **Unfortunately** this will not be able to validate correctly when + using default flags. In the case of filestore vs. bluestore, ceph-volume + defaults to --bluestore, but we can't check that programmatically, we can + only parse the flags seen via argv + """ + # Reduce the parser groups to only the groups we need to intersect + parser_groups = [g for g in parser._action_groups if g.title in groups] + # A mapping of the group name to flags/options + group_flags = {} + flags_to_verify = [] + for group in parser_groups: + # option groups may have more than one item in ``option_strings``, this + # will loop over ``_group_actions`` which contains the + # ``option_strings``, like ``['--filestore']`` + group_flags[group.title] = [ + option for group_action in group._group_actions + for option in group_action.option_strings + ] + + # Gather all the flags present in the groups so that we only check on those. + for flags in group_flags.values(): + flags_to_verify.extend(flags) + + seen = [] + last_flag = None + last_group = None + for flag in argv: + if flag not in flags_to_verify: + continue + for group_name, flags in group_flags.items(): + if flag in flags: + seen.append(group_name) + # We are mutually excluding groups, so having more than 1 group + # in ``seen`` means we must raise an error + if len(set(seen)) == len(groups): + terminal.warning('Incompatible flags were found, some values may get ignored') + msg = 'Cannot use %s (%s) with %s (%s)' % ( + last_flag, last_group, flag, group_name + ) + terminal.warning(msg) + last_group = group_name + last_flag = flag diff --git a/src/ceph-volume/ceph_volume/util/constants.py b/src/ceph-volume/ceph_volume/util/constants.py new file mode 100644 index 00000000..3ec819ec --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/constants.py @@ -0,0 +1,46 @@ + +# mount flags +mount = dict( + xfs=['rw', 'noatime' , 'inode64'] +) + + +# format flags +mkfs = dict( + xfs=[ + # force overwriting previous fs + '-f', + # set the inode size to 2kb + '-i', 'size=2048', + ], +) + +# The fantastical world of ceph-disk labels, they should give you the +# collywobbles +ceph_disk_guids = { + # luks + '45b0969e-9b03-4f30-b4c6-35865ceff106': {'type': 'journal', 'encrypted': True, 'encryption_type': 'luks'}, + 'cafecafe-9b03-4f30-b4c6-35865ceff106': {'type': 'block', 'encrypted': True, 'encryption_type': 'luks'}, + '166418da-c469-4022-adf4-b30afd37f176': {'type': 'block.db', 'encrypted': True, 'encryption_type': 'luks'}, + '86a32090-3647-40b9-bbbd-38d8c573aa86': {'type': 'block.wal', 'encrypted': True, 'encryption_type': 'luks'}, + '4fbd7e29-9d25-41b8-afd0-35865ceff05d': {'type': 'data', 'encrypted': True, 'encryption_type': 'luks'}, + # plain + '45b0969e-9b03-4f30-b4c6-5ec00ceff106': {'type': 'journal', 'encrypted': True, 'encryption_type': 'plain'}, + 'cafecafe-9b03-4f30-b4c6-5ec00ceff106': {'type': 'block', 'encrypted': True, 'encryption_type': 'plain'}, + '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3': {'type': 'block.db', 'encrypted': True, 'encryption_type': 'plain'}, + '306e8683-4fe2-4330-b7c0-00a917c16966': {'type': 'block.wal', 'encrypted': True, 'encryption_type': 'plain'}, + '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d': {'type': 'data', 'encrypted': True, 'encryption_type': 'plain'}, + # regular guids that differ from plain + 'fb3aabf9-d25f-47cc-bf5e-721d1816496b': {'type': 'lockbox', 'encrypted': False, 'encryption_type': None}, + '30cd0809-c2b2-499c-8879-2d6b78529876': {'type': 'block.db', 'encrypted': False, 'encryption_type': None}, + '5ce17fce-4087-4169-b7ff-056cc58473f9': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None}, + '4fbd7e29-9d25-41b8-afd0-062c0ceff05d': {'type': 'data', 'encrypted': False, 'encryption_type': None}, + 'cafecafe-9b03-4f30-b4c6-b4b80ceff106': {'type': 'block', 'encrypted': False, 'encryption_type': None}, + # multipath + '01b41e1b-002a-453c-9f17-88793989ff8f': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None}, + 'ec6d6385-e346-45dc-be91-da2a7c8b3261': {'type': 'block.wal', 'encrypted': False, 'encryption_type': None}, + '45b0969e-8ae0-4982-bf9d-5a8d867af560': {'type': 'journal', 'encrypted': False, 'encryption_type': None}, + '4fbd7e29-8ae0-4982-bf9d-5a8d867af560': {'type': 'data', 'encrypted': False, 'encryption_type': None}, + '7f4a666a-16f3-47a2-8445-152ef4d03f6c': {'type': 'lockbox', 'encrypted': False, 'encryption_type': None}, + 'cafecafe-8ae0-4982-bf9d-5a8d867af560': {'type': 'block', 'encrypted': False, 'encryption_type': None}, +} diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py new file mode 100644 index 00000000..c06244dc --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/device.py @@ -0,0 +1,549 @@ +# -*- coding: utf-8 -*- + +import os +from functools import total_ordering +from ceph_volume import sys_info, process +from ceph_volume.api import lvm +from ceph_volume.util import disk, system +from ceph_volume.util.constants import ceph_disk_guids + +report_template = """ +{dev:<25} {size:<12} {rot!s:<7} {available!s:<9} {model}""" + + +def encryption_status(abspath): + """ + Helper function to run ``encryption.status()``. It is done here to avoid + a circular import issue (encryption module imports from this module) and to + ease testing by allowing monkeypatching of this function. + """ + from ceph_volume.util import encryption + return encryption.status(abspath) + + +class Devices(object): + """ + A container for Device instances with reporting + """ + + def __init__(self, filter_for_batch=False): + if not sys_info.devices: + sys_info.devices = disk.get_devices() + self.devices = [Device(k) for k in + sys_info.devices.keys()] + if filter_for_batch: + self.devices = [d for d in self.devices if d.available_lvm_batch] + + def pretty_report(self): + output = [ + report_template.format( + dev='Device Path', + size='Size', + rot='rotates', + model='Model name', + available='available', + )] + for device in sorted(self.devices): + output.append(device.report()) + return ''.join(output) + + def json_report(self): + output = [] + for device in sorted(self.devices): + output.append(device.json_report()) + return output + +@total_ordering +class Device(object): + + pretty_template = """ + {attr:<25} {value}""" + + report_fields = [ + 'rejected_reasons', + 'available', + 'path', + 'sys_api', + 'device_id', + ] + pretty_report_sys_fields = [ + 'human_readable_size', + 'model', + 'removable', + 'ro', + 'rotational', + 'sas_address', + 'scheduler_mode', + 'vendor', + ] + + # define some class variables; mostly to enable the use of autospec in + # unittests + lvs = [] + + def __init__(self, path): + self.path = path + # LVs can have a vg/lv path, while disks will have /dev/sda + self.abspath = path + self.lv_api = None + self.lvs = [] + self.vgs = [] + self.vg_name = None + self.lv_name = None + self.disk_api = {} + self.blkid_api = {} + self.sys_api = {} + self._exists = None + self._is_lvm_member = None + self._parse() + + self.available_lvm, self.rejected_reasons_lvm = self._check_lvm_reject_reasons() + self.available_raw, self.rejected_reasons_raw = self._check_raw_reject_reasons() + self.available = self.available_lvm and self.available_raw + self.rejected_reasons = list(set(self.rejected_reasons_lvm + + self.rejected_reasons_raw)) + + self.device_id = self._get_device_id() + + def __lt__(self, other): + ''' + Implementing this method and __eq__ allows the @total_ordering + decorator to turn the Device class into a totally ordered type. + This can slower then implementing all comparison operations. + This sorting should put available devices before unavailable devices + and sort on the path otherwise (str sorting). + ''' + if self.available == other.available: + return self.path < other.path + return self.available and not other.available + + def __eq__(self, other): + return self.path == other.path + + def __hash__(self): + return hash(self.path) + + def _parse(self): + if not sys_info.devices: + sys_info.devices = disk.get_devices() + self.sys_api = sys_info.devices.get(self.abspath, {}) + if not self.sys_api: + # if no device was found check if we are a partition + partname = self.abspath.split('/')[-1] + for device, info in sys_info.devices.items(): + part = info['partitions'].get(partname, {}) + if part: + self.sys_api = part + break + + # if the path is not absolute, we have 'vg/lv', let's use LV name + # to get the LV. + if self.path[0] == '/': + lv = lvm.get_first_lv(filters={'lv_path': self.path}) + else: + vgname, lvname = self.path.split('/') + lv = lvm.get_first_lv(filters={'lv_name': lvname, + 'vg_name': vgname}) + if lv: + self.lv_api = lv + self.lvs = [lv] + self.abspath = lv.lv_path + self.vg_name = lv.vg_name + self.lv_name = lv.name + else: + dev = disk.lsblk(self.path) + self.blkid_api = disk.blkid(self.path) + self.disk_api = dev + device_type = dev.get('TYPE', '') + # always check is this is an lvm member + if device_type in ['part', 'disk']: + self._set_lvm_membership() + + self.ceph_disk = CephDiskDevice(self) + + def __repr__(self): + prefix = 'Unknown' + if self.is_lv: + prefix = 'LV' + elif self.is_partition: + prefix = 'Partition' + elif self.is_device: + prefix = 'Raw Device' + return '<%s: %s>' % (prefix, self.abspath) + + def pretty_report(self): + def format_value(v): + if isinstance(v, list): + return ', '.join(v) + else: + return v + def format_key(k): + return k.strip('_').replace('_', ' ') + output = ['\n====== Device report {} ======\n'.format(self.path)] + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in vars(self).items() if k in + self.report_fields and k != 'disk_api' and k != 'sys_api'] ) + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in self.sys_api.items() if k in + self.pretty_report_sys_fields]) + for lv in self.lvs: + output.append(""" + --- Logical Volume ---""") + output.extend( + [self.pretty_template.format( + attr=format_key(k), + value=format_value(v)) for k, v in lv.report().items()]) + return ''.join(output) + + def report(self): + return report_template.format( + dev=self.abspath, + size=self.size_human, + rot=self.rotational, + available=self.available, + model=self.model, + ) + + def json_report(self): + output = {k.strip('_'): v for k, v in vars(self).items() if k in + self.report_fields} + output['lvs'] = [lv.report() for lv in self.lvs] + return output + + def _get_device_id(self): + """ + Please keep this implementation in sync with get_device_id() in + src/common/blkdev.cc + """ + props = ['ID_VENDOR', 'ID_MODEL', 'ID_MODEL_ENC', 'ID_SERIAL_SHORT', 'ID_SERIAL', + 'ID_SCSI_SERIAL'] + p = disk.udevadm_property(self.abspath, props) + if p.get('ID_MODEL','').startswith('LVM PV '): + p['ID_MODEL'] = p.get('ID_MODEL_ENC', '').replace('\\x20', ' ').strip() + if 'ID_VENDOR' in p and 'ID_MODEL' in p and 'ID_SCSI_SERIAL' in p: + dev_id = '_'.join([p['ID_VENDOR'], p['ID_MODEL'], + p['ID_SCSI_SERIAL']]) + elif 'ID_MODEL' in p and 'ID_SERIAL_SHORT' in p: + dev_id = '_'.join([p['ID_MODEL'], p['ID_SERIAL_SHORT']]) + elif 'ID_SERIAL' in p: + dev_id = p['ID_SERIAL'] + if dev_id.startswith('MTFD'): + # Micron NVMes hide the vendor + dev_id = 'Micron_' + dev_id + else: + # the else branch should fallback to using sysfs and ioctl to + # retrieve device_id on FreeBSD. Still figuring out if/how the + # python ioctl implementation does that on FreeBSD + dev_id = '' + dev_id.replace(' ', '_') + return dev_id + + def _set_lvm_membership(self): + if self._is_lvm_member is None: + # this is contentious, if a PV is recognized by LVM but has no + # VGs, should we consider it as part of LVM? We choose not to + # here, because most likely, we need to use VGs from this PV. + self._is_lvm_member = False + for path in self._get_pv_paths(): + vgs = lvm.get_device_vgs(path) + if vgs: + self.vgs.extend(vgs) + # a pv can only be in one vg, so this should be safe + # FIXME: While the above assumption holds, sda1 and sda2 + # can each host a PV and VG. I think the vg_name property is + # actually unused (not 100% sure) and can simply be removed + self.vg_name = vgs[0] + self._is_lvm_member = True + self.lvs.extend(lvm.get_device_lvs(path)) + return self._is_lvm_member + + def _get_pv_paths(self): + """ + For block devices LVM can reside on the raw block device or on a + partition. Return a list of paths to be checked for a pv. + """ + paths = [self.abspath] + path_dir = os.path.dirname(self.abspath) + for part in self.sys_api.get('partitions', {}).keys(): + paths.append(os.path.join(path_dir, part)) + return paths + + @property + def exists(self): + return os.path.exists(self.abspath) + + @property + def has_gpt_headers(self): + return self.blkid_api.get("PTTYPE") == "gpt" + + @property + def rotational(self): + rotational = self.sys_api.get('rotational') + if rotational is None: + # fall back to lsblk if not found in sys_api + # default to '1' if no value is found with lsblk either + rotational = self.disk_api.get('ROTA', '1') + return rotational == '1' + + @property + def model(self): + return self.sys_api['model'] + + @property + def size_human(self): + return self.sys_api['human_readable_size'] + + @property + def size(self): + return self.sys_api['size'] + + @property + def lvm_size(self): + """ + If this device was made into a PV it would lose 1GB in total size + due to the 1GB physical extent size we set when creating volume groups + """ + size = disk.Size(b=self.size) + lvm_size = disk.Size(gb=size.gb.as_int()) - disk.Size(gb=1) + return lvm_size + + @property + def is_lvm_member(self): + if self._is_lvm_member is None: + self._set_lvm_membership() + return self._is_lvm_member + + @property + def is_ceph_disk_member(self): + is_member = self.ceph_disk.is_member + if self.sys_api.get("partitions"): + for part in self.sys_api.get("partitions").keys(): + part = Device("/dev/%s" % part) + if part.is_ceph_disk_member: + is_member = True + break + return is_member + + @property + def has_bluestore_label(self): + out, err, ret = process.call([ + 'ceph-bluestore-tool', 'show-label', + '--dev', self.abspath], verbose_on_failure=False) + if ret: + return False + return True + + @property + def is_mapper(self): + return self.path.startswith(('/dev/mapper', '/dev/dm-')) + + @property + def is_lv(self): + return self.lv_api is not None + + @property + def is_partition(self): + if self.disk_api: + return self.disk_api['TYPE'] == 'part' + elif self.blkid_api: + return self.blkid_api['TYPE'] == 'part' + return False + + @property + def is_device(self): + api = None + if self.disk_api: + api = self.disk_api + elif self.blkid_api: + api = self.blkid_api + if api: + is_device = api['TYPE'] == 'device' + is_disk = api['TYPE'] == 'disk' + if is_device or is_disk: + return True + return False + + @property + def is_acceptable_device(self): + return self.is_device or self.is_partition + + @property + def is_encrypted(self): + """ + Only correct for LVs, device mappers, and partitions. Will report a ``None`` + for raw devices. + """ + crypt_reports = [self.blkid_api.get('TYPE', ''), self.disk_api.get('FSTYPE', '')] + if self.is_lv: + # if disk APIs are reporting this is encrypted use that: + if 'crypto_LUKS' in crypt_reports: + return True + # if ceph-volume created this, then a tag would let us know + elif self.lv_api.encrypted: + return True + return False + elif self.is_partition: + return 'crypto_LUKS' in crypt_reports + elif self.is_mapper: + active_mapper = encryption_status(self.abspath) + if active_mapper: + # normalize a bit to ensure same values regardless of source + encryption_type = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks + return True if encryption_type in ['plain', 'luks'] else False + else: + return False + else: + return None + + @property + def used_by_ceph(self): + # only filter out data devices as journals could potentially be reused + osd_ids = [lv.tags.get("ceph.osd_id") is not None for lv in self.lvs + if lv.tags.get("ceph.type") in ["data", "block"]] + return any(osd_ids) + + @property + def vg_free_percent(self): + if self.vgs: + return [vg.free_percent for vg in self.vgs] + else: + return [1] + + @property + def vg_size(self): + if self.vgs: + return [vg.size for vg in self.vgs] + else: + # TODO fix this...we can probably get rid of vg_free + return self.vg_free + + @property + def vg_free(self): + ''' + Returns the free space in all VGs on this device. If no VGs are + present, returns the disk size. + ''' + if self.vgs: + return [vg.free for vg in self.vgs] + else: + # We could also query 'lvmconfig + # --typeconfig full' and use allocations -> physical_extent_size + # value to project the space for a vg + # assuming 4M extents here + extent_size = 4194304 + vg_free = int(self.size / extent_size) * extent_size + if self.size % extent_size == 0: + # If the extent size divides size exactly, deduct on extent for + # LVM metadata + vg_free -= extent_size + return [vg_free] + + def _check_generic_reject_reasons(self): + reasons = [ + ('removable', 1, 'removable'), + ('ro', 1, 'read-only'), + ('locked', 1, 'locked'), + ] + rejected = [reason for (k, v, reason) in reasons if + self.sys_api.get(k, '') == v] + if self.is_acceptable_device: + # reject disks smaller than 5GB + if int(self.sys_api.get('size', 0)) < 5368709120: + rejected.append('Insufficient space (<5GB)') + else: + rejected.append("Device type is not acceptable. It should be raw device or partition") + if self.is_ceph_disk_member: + rejected.append("Used by ceph-disk") + if self.has_bluestore_label: + rejected.append('Has BlueStore device label') + return rejected + + def _check_lvm_reject_reasons(self): + rejected = [] + if self.vgs: + available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10] + if not available_vgs: + rejected.append('Insufficient space (<10 extents) on vgs') + else: + # only check generic if no vgs are present. Vgs might hold lvs and + # that might cause 'locked' to trigger + rejected.extend(self._check_generic_reject_reasons()) + + return len(rejected) == 0, rejected + + def _check_raw_reject_reasons(self): + rejected = self._check_generic_reject_reasons() + if len(self.vgs) > 0: + rejected.append('LVM detected') + + return len(rejected) == 0, rejected + + @property + def available_lvm_batch(self): + if self.sys_api.get("partitions"): + return False + if system.device_is_mounted(self.path): + return False + return self.is_device or self.is_lv + + +class CephDiskDevice(object): + """ + Detect devices that have been created by ceph-disk, report their type + (journal, data, etc..). Requires a ``Device`` object as input. + """ + + def __init__(self, device): + self.device = device + self._is_ceph_disk_member = None + + @property + def partlabel(self): + """ + In containers, the 'PARTLABEL' attribute might not be detected + correctly via ``lsblk``, so we poke at the value with ``lsblk`` first, + falling back to ``blkid`` (which works correclty in containers). + """ + lsblk_partlabel = self.device.disk_api.get('PARTLABEL') + if lsblk_partlabel: + return lsblk_partlabel + return self.device.blkid_api.get('PARTLABEL', '') + + @property + def parttype(self): + """ + Seems like older version do not detect PARTTYPE correctly (assuming the + info in util/disk.py#lsblk is still valid). + SImply resolve to using blkid since lsblk will throw an error if asked + for an unknown columns + """ + return self.device.blkid_api.get('PARTTYPE', '') + + @property + def is_member(self): + if self._is_ceph_disk_member is None: + if 'ceph' in self.partlabel: + self._is_ceph_disk_member = True + return True + elif self.parttype in ceph_disk_guids.keys(): + return True + return False + return self._is_ceph_disk_member + + @property + def type(self): + types = [ + 'data', 'wal', 'db', 'lockbox', 'journal', + # ceph-disk uses 'ceph block' when placing data in bluestore, but + # keeps the regular OSD files in 'ceph data' :( :( :( :( + 'block', + ] + for t in types: + if t in self.partlabel: + return t + label = ceph_disk_guids.get(self.parttype, {}) + return label.get('type', 'unknown').split('.')[-1] diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py new file mode 100644 index 00000000..2cf18cb5 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -0,0 +1,804 @@ +import logging +import os +import re +import stat +from ceph_volume import process +from ceph_volume.api import lvm +from ceph_volume.util.system import get_file_contents + + +logger = logging.getLogger(__name__) + + +# The blkid CLI tool has some oddities which prevents having one common call +# to extract the information instead of having separate utilities. The `udev` +# type of output is needed in older versions of blkid (v 2.23) that will not +# work correctly with just the ``-p`` flag to bypass the cache for example. +# Xenial doesn't have this problem as it uses a newer blkid version. + + +def get_partuuid(device): + """ + If a device is a partition, it will probably have a PARTUUID on it that + will persist and can be queried against `blkid` later to detect the actual + device + """ + out, err, rc = process.call( + ['blkid', '-s', 'PARTUUID', '-o', 'value', device] + ) + return ' '.join(out).strip() + + +def _blkid_parser(output): + """ + Parses the output from a system ``blkid`` call, requires output to be + produced using the ``-p`` flag which bypasses the cache, mangling the + names. These names are corrected to what it would look like without the + ``-p`` flag. + + Normal output:: + + /dev/sdb1: UUID="62416664-cbaf-40bd-9689-10bd337379c3" TYPE="xfs" [...] + """ + # first spaced separated item is garbage, gets tossed: + output = ' '.join(output.split()[1:]) + # split again, respecting possible whitespace in quoted values + pairs = output.split('" ') + raw = {} + processed = {} + mapping = { + 'UUID': 'UUID', + 'TYPE': 'TYPE', + 'PART_ENTRY_NAME': 'PARTLABEL', + 'PART_ENTRY_UUID': 'PARTUUID', + 'PART_ENTRY_TYPE': 'PARTTYPE', + 'PTTYPE': 'PTTYPE', + } + + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + raw[column] = value.strip().strip().strip('"') + + for key, value in raw.items(): + new_key = mapping.get(key) + if not new_key: + continue + processed[new_key] = value + + return processed + + +def blkid(device): + """ + The blkid interface to its CLI, creating an output similar to what is + expected from ``lsblk``. In most cases, ``lsblk()`` should be the preferred + method for extracting information about a device. There are some corner + cases where it might provide information that is otherwise unavailable. + + The system call uses the ``-p`` flag which bypasses the cache, the caveat + being that the keys produced are named completely different to expected + names. + + For example, instead of ``PARTLABEL`` it provides a ``PART_ENTRY_NAME``. + A bit of translation between these known keys is done, which is why + ``lsblk`` should always be preferred: the output provided here is not as + rich, given that a translation of keys is required for a uniform interface + with the ``-p`` flag. + + Label name to expected output chart: + + cache bypass name expected name + + UUID UUID + TYPE TYPE + PART_ENTRY_NAME PARTLABEL + PART_ENTRY_UUID PARTUUID + """ + out, err, rc = process.call( + ['blkid', '-p', device] + ) + return _blkid_parser(' '.join(out)) + + +def get_part_entry_type(device): + """ + Parses the ``ID_PART_ENTRY_TYPE`` from the "low level" (bypasses the cache) + output that uses the ``udev`` type of output. This output is intended to be + used for udev rules, but it is useful in this case as it is the only + consistent way to retrieve the GUID used by ceph-disk to identify devices. + """ + out, err, rc = process.call(['blkid', '-p', '-o', 'udev', device]) + for line in out: + if 'ID_PART_ENTRY_TYPE=' in line: + return line.split('=')[-1].strip() + return '' + + +def get_device_from_partuuid(partuuid): + """ + If a device has a partuuid, query blkid so that it can tell us what that + device is + """ + out, err, rc = process.call( + ['blkid', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device'] + ) + return ' '.join(out).strip() + + +def remove_partition(device): + """ + Removes a partition using parted + + :param device: A ``Device()`` object + """ + parent_device = '/dev/%s' % device.disk_api['PKNAME'] + udev_info = udevadm_property(device.abspath) + partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') + if not partition_number: + raise RuntimeError('Unable to detect the partition number for device: %s' % device.abspath) + + process.run( + ['parted', parent_device, '--script', '--', 'rm', partition_number] + ) + + +def _stat_is_device(stat_obj): + """ + Helper function that will interpret ``os.stat`` output directly, so that other + functions can call ``os.stat`` once and interpret that result several times + """ + return stat.S_ISBLK(stat_obj) + + +def _lsblk_parser(line): + """ + Parses lines in lsblk output. Requires output to be in pair mode (``-P`` flag). Lines + need to be whole strings, the line gets split when processed. + + :param line: A string, with the full line from lsblk output + """ + # parse the COLUMN="value" output to construct the dictionary + pairs = line.split('" ') + parsed = {} + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + parsed[column] = value.strip().strip().strip('"') + return parsed + + +def device_family(device): + """ + Returns a list of associated devices. It assumes that ``device`` is + a parent device. It is up to the caller to ensure that the device being + used is a parent, not a partition. + """ + labels = ['NAME', 'PARTLABEL', 'TYPE'] + command = ['lsblk', '-P', '-p', '-o', ','.join(labels), device] + out, err, rc = process.call(command) + devices = [] + for line in out: + devices.append(_lsblk_parser(line)) + + return devices + + +def udevadm_property(device, properties=[]): + """ + Query udevadm for information about device properties. + Optionally pass a list of properties to return. A requested property might + not be returned if not present. + + Expected output format:: + # udevadm info --query=property --name=/dev/sda :( + DEVNAME=/dev/sda + DEVTYPE=disk + ID_ATA=1 + ID_BUS=ata + ID_MODEL=SK_hynix_SC311_SATA_512GB + ID_PART_TABLE_TYPE=gpt + ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c + ID_PATH=pci-0000:00:17.0-ata-3 + ID_PATH_TAG=pci-0000_00_17_0-ata-3 + ID_REVISION=70000P10 + ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A + TAGS=:systemd: + USEC_INITIALIZED=16117769 + ... + """ + out = _udevadm_info(device) + ret = {} + for line in out: + p, v = line.split('=', 1) + if not properties or p in properties: + ret[p] = v + return ret + + +def _udevadm_info(device): + """ + Call udevadm and return the output + """ + cmd = ['udevadm', 'info', '--query=property', device] + out, _err, _rc = process.call(cmd) + return out + + +def lsblk(device, columns=None, abspath=False): + """ + Create a dictionary of identifying values for a device using ``lsblk``. + Each supported column is a key, in its *raw* format (all uppercase + usually). ``lsblk`` has support for certain "columns" (in blkid these + would be labels), and these columns vary between distributions and + ``lsblk`` versions. The newer versions support a richer set of columns, + while older ones were a bit limited. + + These are a subset of lsblk columns which are known to work on both CentOS 7 and Xenial: + + NAME device name + KNAME internal kernel device name + MAJ:MIN major:minor device number + FSTYPE filesystem type + MOUNTPOINT where the device is mounted + LABEL filesystem LABEL + UUID filesystem UUID + RO read-only device + RM removable device + MODEL device identifier + SIZE size of the device + STATE state of the device + OWNER user name + GROUP group name + MODE device node permissions + ALIGNMENT alignment offset + MIN-IO minimum I/O size + OPT-IO optimal I/O size + PHY-SEC physical sector size + LOG-SEC logical sector size + ROTA rotational device + SCHED I/O scheduler name + RQ-SIZE request queue size + TYPE device type + PKNAME internal parent kernel device name + DISC-ALN discard alignment offset + DISC-GRAN discard granularity + DISC-MAX discard max bytes + DISC-ZERO discard zeroes data + + There is a bug in ``lsblk`` where using all the available (supported) + columns will result in no output (!), in order to workaround this the + following columns have been removed from the default reporting columns: + + * RQ-SIZE (request queue size) + * MIN-IO minimum I/O size + * OPT-IO optimal I/O size + + These should be available however when using `columns`. For example:: + + >>> lsblk('/dev/sda1', columns=['OPT-IO']) + {'OPT-IO': '0'} + + Normal CLI output, as filtered by the flags in this function will look like :: + + $ lsblk --nodeps -P -o NAME,KNAME,MAJ:MIN,FSTYPE,MOUNTPOINT + NAME="sda1" KNAME="sda1" MAJ:MIN="8:1" FSTYPE="ext4" MOUNTPOINT="/" + + :param columns: A list of columns to report as keys in its original form. + :param abspath: Set the flag for absolute paths on the report + """ + default_columns = [ + 'NAME', 'KNAME', 'MAJ:MIN', 'FSTYPE', 'MOUNTPOINT', 'LABEL', 'UUID', + 'RO', 'RM', 'MODEL', 'SIZE', 'STATE', 'OWNER', 'GROUP', 'MODE', + 'ALIGNMENT', 'PHY-SEC', 'LOG-SEC', 'ROTA', 'SCHED', 'TYPE', 'DISC-ALN', + 'DISC-GRAN', 'DISC-MAX', 'DISC-ZERO', 'PKNAME', 'PARTLABEL' + ] + device = device.rstrip('/') + columns = columns or default_columns + # --nodeps -> Avoid adding children/parents to the device, only give information + # on the actual device we are querying for + # -P -> Produce pairs of COLUMN="value" + # -p -> Return full paths to devices, not just the names, when ``abspath`` is set + # -o -> Use the columns specified or default ones provided by this function + base_command = ['lsblk', '--nodeps', '-P'] + if abspath: + base_command.append('-p') + base_command.append('-o') + base_command.append(','.join(columns)) + base_command.append(device) + out, err, rc = process.call(base_command) + + if rc != 0: + return {} + + return _lsblk_parser(' '.join(out)) + + +def is_device(dev): + """ + Boolean to determine if a given device is a block device (**not** + a partition!) + + For example: /dev/sda would return True, but not /dev/sdc1 + """ + if not os.path.exists(dev): + return False + # use lsblk first, fall back to using stat + TYPE = lsblk(dev).get('TYPE') + if TYPE: + return TYPE == 'disk' + + # fallback to stat + return _stat_is_device(os.lstat(dev).st_mode) + if stat.S_ISBLK(os.lstat(dev)): + return True + return False + + +def is_partition(dev): + """ + Boolean to determine if a given device is a partition, like /dev/sda1 + """ + if not os.path.exists(dev): + return False + # use lsblk first, fall back to using stat + TYPE = lsblk(dev).get('TYPE') + if TYPE: + return TYPE == 'part' + + # fallback to stat + stat_obj = os.stat(dev) + if _stat_is_device(stat_obj.st_mode): + return False + + major = os.major(stat_obj.st_rdev) + minor = os.minor(stat_obj.st_rdev) + if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)): + return True + return False + + +class BaseFloatUnit(float): + """ + Base class to support float representations of size values. Suffix is + computed on child classes by inspecting the class name + """ + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self.__float__()) + + def __str__(self): + return "{size:.2f} {suffix}".format( + size=self.__float__(), + suffix=self.__class__.__name__.split('Float')[-1] + ) + + def as_int(self): + return int(self.real) + + def as_float(self): + return self.real + + +class FloatB(BaseFloatUnit): + pass + + +class FloatMB(BaseFloatUnit): + pass + + +class FloatGB(BaseFloatUnit): + pass + + +class FloatKB(BaseFloatUnit): + pass + + +class FloatTB(BaseFloatUnit): + pass + + +class Size(object): + """ + Helper to provide an interface for different sizes given a single initial + input. Allows for comparison between different size objects, which avoids + the need to convert sizes before comparison (e.g. comparing megabytes + against gigabytes). + + Common comparison operators are supported:: + + >>> hd1 = Size(gb=400) + >>> hd2 = Size(gb=500) + >>> hd1 > hd2 + False + >>> hd1 < hd2 + True + >>> hd1 == hd2 + False + >>> hd1 == Size(gb=400) + True + + The Size object can also be multiplied or divided:: + + >>> hd1 + <Size(400.00 GB)> + >>> hd1 * 2 + <Size(800.00 GB)> + >>> hd1 + <Size(800.00 GB)> + + Additions and subtractions are only supported between Size objects:: + + >>> Size(gb=224) - Size(gb=100) + <Size(124.00 GB)> + >>> Size(gb=1) + Size(mb=300) + <Size(1.29 GB)> + + Can also display a human-readable representation, with automatic detection + on best suited unit, or alternatively, specific unit representation:: + + >>> s = Size(mb=2211) + >>> s + <Size(2.16 GB)> + >>> s.mb + <FloatMB(2211.0)> + >>> print "Total size: %s" % s.mb + Total size: 2211.00 MB + >>> print "Total size: %s" % s + Total size: 2.16 GB + """ + + @classmethod + def parse(cls, size): + if (len(size) > 2 and + size[-2].lower() in ['k', 'm', 'g', 't'] and + size[-1].lower() == 'b'): + return cls(**{size[-2:].lower(): float(size[0:-2])}) + elif size[-1].lower() in ['b', 'k', 'm', 'g', 't']: + return cls(**{size[-1].lower(): float(size[0:-1])}) + else: + return cls(b=float(size)) + + + def __init__(self, multiplier=1024, **kw): + self._multiplier = multiplier + # create a mapping of units-to-multiplier, skip bytes as that is + # calculated initially always and does not need to convert + aliases = [ + [('k', 'kb', 'kilobytes'), self._multiplier], + [('m', 'mb', 'megabytes'), self._multiplier ** 2], + [('g', 'gb', 'gigabytes'), self._multiplier ** 3], + [('t', 'tb', 'terabytes'), self._multiplier ** 4], + ] + # and mappings for units-to-formatters, including bytes and aliases for + # each + format_aliases = [ + [('b', 'bytes'), FloatB], + [('kb', 'kilobytes'), FloatKB], + [('mb', 'megabytes'), FloatMB], + [('gb', 'gigabytes'), FloatGB], + [('tb', 'terabytes'), FloatTB], + ] + self._formatters = {} + for key, value in format_aliases: + for alias in key: + self._formatters[alias] = value + self._factors = {} + for key, value in aliases: + for alias in key: + self._factors[alias] = value + + for k, v in kw.items(): + self._convert(v, k) + # only pursue the first occurrence + break + + def _convert(self, size, unit): + """ + Convert any size down to bytes so that other methods can rely on bytes + being available always, regardless of what they pass in, avoiding the + need for a mapping of every permutation. + """ + if unit in ['b', 'bytes']: + self._b = size + return + factor = self._factors[unit] + self._b = float(size * factor) + + def _get_best_format(self): + """ + Go through all the supported units, and use the first one that is less + than 1024. This allows to represent size in the most readable format + available + """ + for unit in ['b', 'kb', 'mb', 'gb', 'tb']: + if getattr(self, unit) > 1024: + continue + return getattr(self, unit) + + def __repr__(self): + return "<Size(%s)>" % self._get_best_format() + + def __str__(self): + return "%s" % self._get_best_format() + + def __format__(self, spec): + return str(self._get_best_format()).__format__(spec) + + def __int__(self): + return int(self._b) + + def __float__(self): + return self._b + + def __lt__(self, other): + if isinstance(other, Size): + return self._b < other._b + else: + return self.b < other + + def __le__(self, other): + if isinstance(other, Size): + return self._b <= other._b + else: + return self.b <= other + + def __eq__(self, other): + if isinstance(other, Size): + return self._b == other._b + else: + return self.b == other + + def __ne__(self, other): + if isinstance(other, Size): + return self._b != other._b + else: + return self.b != other + + def __ge__(self, other): + if isinstance(other, Size): + return self._b >= other._b + else: + return self.b >= other + + def __gt__(self, other): + if isinstance(other, Size): + return self._b > other._b + else: + return self.b > other + + def __add__(self, other): + if isinstance(other, Size): + _b = self._b + other._b + return Size(b=_b) + raise TypeError('Cannot add "Size" object with int') + + def __sub__(self, other): + if isinstance(other, Size): + _b = self._b - other._b + return Size(b=_b) + raise TypeError('Cannot subtract "Size" object from int') + + def __mul__(self, other): + if isinstance(other, Size): + raise TypeError('Cannot multiply with "Size" object') + _b = self._b * other + return Size(b=_b) + + def __truediv__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __div__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __bool__(self): + return self.b != 0 + + def __nonzero__(self): + return self.__bool__() + + def __getattr__(self, unit): + """ + Calculate units on the fly, relies on the fact that ``bytes`` has been + converted at instantiation. Units that don't exist will trigger an + ``AttributeError`` + """ + try: + formatter = self._formatters[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + if unit in ['b', 'bytes']: + return formatter(self._b) + try: + factor = self._factors[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + return formatter(float(self._b) / factor) + + +def human_readable_size(size): + """ + Take a size in bytes, and transform it into a human readable size with up + to two decimals of precision. + """ + suffixes = ['B', 'KB', 'MB', 'GB', 'TB'] + suffix_index = 0 + while size > 1024: + suffix_index += 1 + size = size / 1024.0 + return "{size:.2f} {suffix}".format( + size=size, + suffix=suffixes[suffix_index]) + + +def size_from_human_readable(s): + """ + Takes a human readable string and converts into a Size. If no unit is + passed, bytes is assumed. + """ + s = s.replace(' ', '') + if s[-1].isdigit(): + return Size(b=float(s)) + n = float(s[:-1]) + if s[-1].lower() == 't': + return Size(tb=n) + if s[-1].lower() == 'g': + return Size(gb=n) + if s[-1].lower() == 'm': + return Size(mb=n) + if s[-1].lower() == 'k': + return Size(kb=n) + return None + + +def get_partitions_facts(sys_block_path): + partition_metadata = {} + for folder in os.listdir(sys_block_path): + folder_path = os.path.join(sys_block_path, folder) + if os.path.exists(os.path.join(folder_path, 'partition')): + contents = get_file_contents(os.path.join(folder_path, 'partition')) + if contents: + part = {} + partname = folder + part_sys_block_path = os.path.join(sys_block_path, partname) + + part['start'] = get_file_contents(part_sys_block_path + "/start", 0) + part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0) + + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/logical_block_size") + if not part['sectorsize']: + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/hw_sector_size", 512) + part['size'] = float(part['sectors']) * 512 + part['human_readable_size'] = human_readable_size(float(part['sectors']) * 512) + part['holders'] = [] + for holder in os.listdir(part_sys_block_path + '/holders'): + part['holders'].append(holder) + + partition_metadata[partname] = part + return partition_metadata + + +def is_mapper_device(device_name): + return device_name.startswith(('/dev/mapper', '/dev/dm-')) + + +def is_locked_raw_device(disk_path): + """ + A device can be locked by a third party software like a database. + To detect that case, the device is opened in Read/Write and exclusive mode + """ + open_flags = (os.O_RDWR | os.O_EXCL) + open_mode = 0 + fd = None + + try: + fd = os.open(disk_path, open_flags, open_mode) + except OSError: + return 1 + + try: + os.close(fd) + except OSError: + return 1 + + return 0 + + +def get_block_devs_lsblk(): + ''' + This returns a list of lists with 3 items per inner list. + KNAME - reflects the kernel device name , for example /dev/sda or /dev/dm-0 + NAME - the device name, for example /dev/sda or + /dev/mapper/<vg_name>-<lv_name> + TYPE - the block device type: disk, partition, lvm and such + + ''' + cmd = ['lsblk', '-plno', 'KNAME,NAME,TYPE'] + stdout, stderr, rc = process.call(cmd) + # lsblk returns 1 on failure + if rc == 1: + raise OSError('lsblk returned failure, stderr: {}'.format(stderr)) + return [re.split(r'\s+', line) for line in stdout] + + +def get_devices(_sys_block_path='/sys/block'): + """ + Captures all available block devices as reported by lsblk. + Additional interesting metadata like sectors, size, vendor, + solid/rotational, etc. is collected from /sys/block/<device> + + Returns a dictionary, where keys are the full paths to devices. + + ..note:: loop devices, removable media, and logical volumes are never included. + """ + + device_facts = {} + + block_devs = get_block_devs_lsblk() + + for block in block_devs: + devname = os.path.basename(block[0]) + diskname = block[1] + if block[2] != 'disk': + continue + sysdir = os.path.join(_sys_block_path, devname) + metadata = {} + + # If the mapper device is a logical volume it gets excluded + if is_mapper_device(diskname): + if lvm.get_device_lvs(diskname): + continue + + # all facts that have no defaults + # (<name>, <path relative to _sys_block_path>) + facts = [('removable', 'removable'), + ('ro', 'ro'), + ('vendor', 'device/vendor'), + ('model', 'device/model'), + ('rev', 'device/rev'), + ('sas_address', 'device/sas_address'), + ('sas_device_handle', 'device/sas_device_handle'), + ('support_discard', 'queue/discard_granularity'), + ('rotational', 'queue/rotational'), + ('nr_requests', 'queue/nr_requests'), + ] + for key, file_ in facts: + metadata[key] = get_file_contents(os.path.join(sysdir, file_)) + + metadata['scheduler_mode'] = "" + scheduler = get_file_contents(sysdir + "/queue/scheduler") + if scheduler is not None: + m = re.match(r".*?(\[(.*)\])", scheduler) + if m: + metadata['scheduler_mode'] = m.group(2) + + metadata['partitions'] = get_partitions_facts(sysdir) + + size = get_file_contents(os.path.join(sysdir, 'size'), 0) + + metadata['sectors'] = get_file_contents(os.path.join(sysdir, 'sectors'), 0) + fallback_sectorsize = get_file_contents(sysdir + "/queue/hw_sector_size", 512) + metadata['sectorsize'] = get_file_contents(sysdir + + "/queue/logical_block_size", + fallback_sectorsize) + metadata['size'] = float(size) * 512 + metadata['human_readable_size'] = human_readable_size(metadata['size']) + metadata['path'] = diskname + metadata['locked'] = is_locked_raw_device(metadata['path']) + + device_facts[diskname] = metadata + return device_facts diff --git a/src/ceph-volume/ceph_volume/util/encryption.py b/src/ceph-volume/ceph_volume/util/encryption.py new file mode 100644 index 00000000..72a0ccf1 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/encryption.py @@ -0,0 +1,263 @@ +import base64 +import os +import logging +from ceph_volume import process, conf +from ceph_volume.util import constants, system +from ceph_volume.util.device import Device +from .prepare import write_keyring +from .disk import lsblk, device_family, get_part_entry_type + +logger = logging.getLogger(__name__) + + +def create_dmcrypt_key(): + """ + Create the secret dm-crypt key used to decrypt a device. + """ + # get the customizable dmcrypt key size (in bits) from ceph.conf fallback + # to the default of 1024 + dmcrypt_key_size = conf.ceph.get_safe( + 'osd', + 'osd_dmcrypt_key_size', + default=1024, + ) + # The size of the key is defined in bits, so we must transform that + # value to bytes (dividing by 8) because we read in bytes, not bits + random_string = os.urandom(int(dmcrypt_key_size / 8)) + key = base64.b64encode(random_string).decode('utf-8') + return key + + +def luks_format(key, device): + """ + Decrypt (open) an encrypted device, previously prepared with cryptsetup + + :param key: dmcrypt secret key, will be used for decrypting + :param device: Absolute path to device + """ + command = [ + 'cryptsetup', + '--batch-mode', # do not prompt + '--key-file', # misnomer, should be key + '-', # because we indicate stdin for the key here + 'luksFormat', + device, + ] + process.call(command, stdin=key, terminal_verbose=True, show_command=True) + + +def plain_open(key, device, mapping): + """ + Decrypt (open) an encrypted device, previously prepared with cryptsetup in plain mode + + .. note: ceph-disk will require an additional b64decode call for this to work + + :param key: dmcrypt secret key + :param device: absolute path to device + :param mapping: mapping name used to correlate device. Usually a UUID + """ + command = [ + 'cryptsetup', + '--key-file', + '-', + '--allow-discards', # allow discards (aka TRIM) requests for device + 'open', + device, + mapping, + '--type', 'plain', + '--key-size', '256', + ] + + process.call(command, stdin=key, terminal_verbose=True, show_command=True) + + +def luks_open(key, device, mapping): + """ + Decrypt (open) an encrypted device, previously prepared with cryptsetup + + .. note: ceph-disk will require an additional b64decode call for this to work + + :param key: dmcrypt secret key + :param device: absolute path to device + :param mapping: mapping name used to correlate device. Usually a UUID + """ + command = [ + 'cryptsetup', + '--key-file', + '-', + '--allow-discards', # allow discards (aka TRIM) requests for device + 'luksOpen', + device, + mapping, + ] + process.call(command, stdin=key, terminal_verbose=True, show_command=True) + + +def dmcrypt_close(mapping): + """ + Encrypt (close) a device, previously decrypted with cryptsetup + + :param mapping: + """ + if not os.path.exists(mapping): + logger.debug('device mapper path does not exist %s' % mapping) + logger.debug('will skip cryptsetup removal') + return + # don't be strict about the remove call, but still warn on the terminal if it fails + process.run(['cryptsetup', 'remove', mapping], stop_on_error=False) + + +def get_dmcrypt_key(osd_id, osd_fsid, lockbox_keyring=None): + """ + Retrieve the dmcrypt (secret) key stored initially on the monitor. The key + is sent initially with JSON, and the Monitor then mangles the name to + ``dm-crypt/osd/<fsid>/luks`` + + The ``lockbox.keyring`` file is required for this operation, and it is + assumed it will exist on the path for the same OSD that is being activated. + To support scanning, it is optionally configurable to a custom location + (e.g. inside a lockbox partition mounted in a temporary location) + """ + if lockbox_keyring is None: + lockbox_keyring = '/var/lib/ceph/osd/%s-%s/lockbox.keyring' % (conf.cluster, osd_id) + name = 'client.osd-lockbox.%s' % osd_fsid + config_key = 'dm-crypt/osd/%s/luks' % osd_fsid + + stdout, stderr, returncode = process.call( + [ + 'ceph', + '--cluster', conf.cluster, + '--name', name, + '--keyring', lockbox_keyring, + 'config-key', + 'get', + config_key + ], + show_command=True + ) + if returncode != 0: + raise RuntimeError('Unable to retrieve dmcrypt secret') + return ' '.join(stdout).strip() + + +def write_lockbox_keyring(osd_id, osd_fsid, secret): + """ + Helper to write the lockbox keyring. This is needed because the bluestore OSD will + not persist the keyring, and it can't be stored in the data device for filestore because + at the time this is needed, the device is encrypted. + + For bluestore: A tmpfs filesystem is mounted, so the path can get written + to, but the files are ephemeral, which requires this file to be created + every time it is activated. + For filestore: The path for the OSD would exist at this point even if no + OSD data device is mounted, so the keyring is written to fetch the key, and + then the data device is mounted on that directory, making the keyring + "disappear". + """ + if os.path.exists('/var/lib/ceph/osd/%s-%s/lockbox.keyring' % (conf.cluster, osd_id)): + return + + name = 'client.osd-lockbox.%s' % osd_fsid + write_keyring( + osd_id, + secret, + keyring_name='lockbox.keyring', + name=name + ) + + +def status(device): + """ + Capture the metadata information of a possibly encrypted device, returning + a dictionary with all the values found (if any). + + An encrypted device will contain information about a device. Example + successful output looks like:: + + $ cryptsetup status /dev/mapper/ed6b5a26-eafe-4cd4-87e3-422ff61e26c4 + /dev/mapper/ed6b5a26-eafe-4cd4-87e3-422ff61e26c4 is active and is in use. + type: LUKS1 + cipher: aes-xts-plain64 + keysize: 256 bits + device: /dev/sdc2 + offset: 4096 sectors + size: 20740063 sectors + mode: read/write + + As long as the mapper device is in 'open' state, the ``status`` call will work. + + :param device: Absolute path or UUID of the device mapper + """ + command = [ + 'cryptsetup', + 'status', + device, + ] + out, err, code = process.call(command, show_command=True, verbose_on_failure=False) + + metadata = {} + if code != 0: + logger.warning('failed to detect device mapper information') + return metadata + for line in out: + # get rid of lines that might not be useful to construct the report: + if not line.startswith(' '): + continue + try: + column, value = line.split(': ') + except ValueError: + continue + metadata[column.strip()] = value.strip().strip('"') + return metadata + + +def legacy_encrypted(device): + """ + Detect if a device was encrypted with ceph-disk or not. In the case of + encrypted devices, include the type of encryption (LUKS, or PLAIN), and + infer what the lockbox partition is. + + This function assumes that ``device`` will be a partition. + """ + if os.path.isdir(device): + mounts = system.get_mounts(paths=True) + # yes, rebind the device variable here because a directory isn't going + # to help with parsing + device = mounts.get(device, [None])[0] + if not device: + raise RuntimeError('unable to determine the device mounted at %s' % device) + metadata = {'encrypted': False, 'type': None, 'lockbox': '', 'device': device} + # check if the device is online/decrypted first + active_mapper = status(device) + if active_mapper: + # normalize a bit to ensure same values regardless of source + metadata['type'] = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks + metadata['encrypted'] = True if metadata['type'] in ['plain', 'luks'] else False + # The true device is now available to this function, so it gets + # re-assigned here for the lockbox checks to succeed (it is not + # possible to guess partitions from a device mapper device otherwise + device = active_mapper.get('device', device) + metadata['device'] = device + else: + uuid = get_part_entry_type(device) + guid_match = constants.ceph_disk_guids.get(uuid, {}) + encrypted_guid = guid_match.get('encrypted', False) + if encrypted_guid: + metadata['encrypted'] = True + metadata['type'] = guid_match['encryption_type'] + + # Lets find the lockbox location now, to do this, we need to find out the + # parent device name for the device so that we can query all of its + # associated devices and *then* look for one that has the 'lockbox' label + # on it. Thanks for being awesome ceph-disk + disk_meta = lsblk(device, abspath=True) + if not disk_meta: + return metadata + parent_device = disk_meta['PKNAME'] + # With the parent device set, we can now look for the lockbox listing associated devices + devices = [Device(i['NAME']) for i in device_family(parent_device)] + for d in devices: + if d.ceph_disk.type == 'lockbox': + metadata['lockbox'] = d.abspath + break + return metadata diff --git a/src/ceph-volume/ceph_volume/util/prepare.py b/src/ceph-volume/ceph_volume/util/prepare.py new file mode 100644 index 00000000..85b7033c --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/prepare.py @@ -0,0 +1,531 @@ +""" +These utilities for prepare provide all the pieces needed to prepare a device +but also a compounded ("single call") helper to do them in order. Some plugins +may want to change some part of the process, while others might want to consume +the single-call helper +""" +import errno +import os +import logging +import json +import time +from ceph_volume import process, conf, __release__, terminal +from ceph_volume.util import system, constants, str_to_int, disk + +logger = logging.getLogger(__name__) +mlogger = terminal.MultiLogger(__name__) + + +def create_key(): + stdout, stderr, returncode = process.call( + ['ceph-authtool', '--gen-print-key'], + show_command=True) + if returncode != 0: + raise RuntimeError('Unable to generate a new auth key') + return ' '.join(stdout).strip() + + +def write_keyring(osd_id, secret, keyring_name='keyring', name=None): + """ + Create a keyring file with the ``ceph-authtool`` utility. Constructs the + path over well-known conventions for the OSD, and allows any other custom + ``name`` to be set. + + :param osd_id: The ID for the OSD to be used + :param secret: The key to be added as (as a string) + :param name: Defaults to 'osd.{ID}' but can be used to add other client + names, specifically for 'lockbox' type of keys + :param keyring_name: Alternative keyring name, for supporting other + types of keys like for lockbox + """ + osd_keyring = '/var/lib/ceph/osd/%s-%s/%s' % (conf.cluster, osd_id, keyring_name) + name = name or 'osd.%s' % str(osd_id) + process.run( + [ + 'ceph-authtool', osd_keyring, + '--create-keyring', + '--name', name, + '--add-key', secret + ]) + system.chown(osd_keyring) + + +def get_journal_size(lv_format=True): + """ + Helper to retrieve the size (defined in megabytes in ceph.conf) to create + the journal logical volume, it "translates" the string into a float value, + then converts that into gigabytes, and finally (optionally) it formats it + back as a string so that it can be used for creating the LV. + + :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size + would result in '5G', otherwise it will return a ``Size`` object. + """ + conf_journal_size = conf.ceph.get_safe('osd', 'osd_journal_size', '5120') + logger.debug('osd_journal_size set to %s' % conf_journal_size) + journal_size = disk.Size(mb=str_to_int(conf_journal_size)) + + if journal_size < disk.Size(gb=2): + mlogger.error('Refusing to continue with configured size for journal') + raise RuntimeError('journal sizes must be larger than 2GB, detected: %s' % journal_size) + if lv_format: + return '%sG' % journal_size.gb.as_int() + return journal_size + + +def get_block_db_size(lv_format=True): + """ + Helper to retrieve the size (defined in megabytes in ceph.conf) to create + the block.db logical volume, it "translates" the string into a float value, + then converts that into gigabytes, and finally (optionally) it formats it + back as a string so that it can be used for creating the LV. + + :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size + would result in '5G', otherwise it will return a ``Size`` object. + + .. note: Configuration values are in bytes, unlike journals which + are defined in gigabytes + """ + conf_db_size = None + try: + conf_db_size = conf.ceph.get_safe('osd', 'bluestore_block_db_size', None) + except RuntimeError: + logger.exception("failed to load ceph configuration, will use defaults") + + if not conf_db_size: + logger.debug( + 'block.db has no size configuration, will fallback to using as much as possible' + ) + # TODO better to return disk.Size(b=0) here + return None + logger.debug('bluestore_block_db_size set to %s' % conf_db_size) + db_size = disk.Size(b=str_to_int(conf_db_size)) + + if db_size < disk.Size(gb=2): + mlogger.error('Refusing to continue with configured size for block.db') + raise RuntimeError('block.db sizes must be larger than 2GB, detected: %s' % db_size) + if lv_format: + return '%sG' % db_size.gb.as_int() + return db_size + +def get_block_wal_size(lv_format=True): + """ + Helper to retrieve the size (defined in megabytes in ceph.conf) to create + the block.wal logical volume, it "translates" the string into a float value, + then converts that into gigabytes, and finally (optionally) it formats it + back as a string so that it can be used for creating the LV. + + :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size + would result in '5G', otherwise it will return a ``Size`` object. + + .. note: Configuration values are in bytes, unlike journals which + are defined in gigabytes + """ + conf_wal_size = None + try: + conf_wal_size = conf.ceph.get_safe('osd', 'bluestore_block_wal_size', None) + except RuntimeError: + logger.exception("failed to load ceph configuration, will use defaults") + + if not conf_wal_size: + logger.debug( + 'block.wal has no size configuration, will fallback to using as much as possible' + ) + return None + logger.debug('bluestore_block_wal_size set to %s' % conf_wal_size) + wal_size = disk.Size(b=str_to_int(conf_wal_size)) + + if wal_size < disk.Size(gb=2): + mlogger.error('Refusing to continue with configured size for block.wal') + raise RuntimeError('block.wal sizes must be larger than 2GB, detected: %s' % wal_size) + if lv_format: + return '%sG' % wal_size.gb.as_int() + return wal_size + + +def create_id(fsid, json_secrets, osd_id=None): + """ + :param fsid: The osd fsid to create, always required + :param json_secrets: a json-ready object with whatever secrets are wanted + to be passed to the monitor + :param osd_id: Reuse an existing ID from an OSD that's been destroyed, if the + id does not exist in the cluster a new ID will be created + """ + bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster + cmd = [ + 'ceph', + '--cluster', conf.cluster, + '--name', 'client.bootstrap-osd', + '--keyring', bootstrap_keyring, + '-i', '-', + 'osd', 'new', fsid + ] + if osd_id is not None: + if osd_id_available(osd_id): + cmd.append(osd_id) + else: + raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id)) + stdout, stderr, returncode = process.call( + cmd, + stdin=json_secrets, + show_command=True + ) + if returncode != 0: + raise RuntimeError('Unable to create a new OSD id') + return ' '.join(stdout).strip() + + +def osd_id_available(osd_id): + """ + Checks to see if an osd ID exists and if it's available for + reuse. Returns True if it is, False if it isn't. + + :param osd_id: The osd ID to check + """ + if osd_id is None: + return False + bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster + stdout, stderr, returncode = process.call( + [ + 'ceph', + '--cluster', conf.cluster, + '--name', 'client.bootstrap-osd', + '--keyring', bootstrap_keyring, + 'osd', + 'tree', + '-f', 'json', + ], + show_command=True + ) + if returncode != 0: + raise RuntimeError('Unable check if OSD id exists: %s' % osd_id) + + output = json.loads(''.join(stdout).strip()) + osds = output['nodes'] + osd = [osd for osd in osds if str(osd['id']) == str(osd_id)] + if osd and osd[0].get('status') == "destroyed": + return True + return False + + +def mount_tmpfs(path): + process.run([ + 'mount', + '-t', + 'tmpfs', 'tmpfs', + path + ]) + + # Restore SELinux context + system.set_context(path) + + +def create_osd_path(osd_id, tmpfs=False): + path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) + system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)) + if tmpfs: + mount_tmpfs(path) + + +def format_device(device): + # only supports xfs + command = ['mkfs', '-t', 'xfs'] + + # get the mkfs options if any for xfs, + # fallback to the default options defined in constants.mkfs + flags = conf.ceph.get_list( + 'osd', + 'osd_mkfs_options_xfs', + default=constants.mkfs.get('xfs'), + split=' ', + ) + + # always force + if '-f' not in flags: + flags.insert(0, '-f') + + command.extend(flags) + command.append(device) + process.run(command) + + +def _normalize_mount_flags(flags, extras=None): + """ + Mount flag options have to be a single string, separated by a comma. If the + flags are separated by spaces, or with commas and spaces in ceph.conf, the + mount options will be passed incorrectly. + + This will help when parsing ceph.conf values return something like:: + + ["rw,", "exec,"] + + Or:: + + [" rw ,", "exec"] + + :param flags: A list of flags, or a single string of mount flags + :param extras: Extra set of mount flags, useful when custom devices like VDO need + ad-hoc mount configurations + """ + # Instead of using set(), we append to this new list here, because set() + # will create an arbitrary order on the items that is made worst when + # testing with tools like tox that includes a randomizer seed. By + # controlling the order, it is easier to correctly assert the expectation + unique_flags = [] + if isinstance(flags, list): + if extras: + flags.extend(extras) + + # ensure that spaces and commas are removed so that they can join + # correctly, remove duplicates + for f in flags: + if f and f not in unique_flags: + unique_flags.append(f.strip().strip(',')) + return ','.join(unique_flags) + + # split them, clean them, and join them back again + flags = flags.strip().split(' ') + if extras: + flags.extend(extras) + + # remove possible duplicates + for f in flags: + if f and f not in unique_flags: + unique_flags.append(f.strip().strip(',')) + flags = ','.join(unique_flags) + # Before returning, split them again, since strings can be mashed up + # together, preventing removal of duplicate entries + return ','.join(set(flags.split(','))) + + +def mount_osd(device, osd_id, **kw): + extras = [] + is_vdo = kw.get('is_vdo', '0') + if is_vdo == '1': + extras = ['discard'] + destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) + command = ['mount', '-t', 'xfs', '-o'] + flags = conf.ceph.get_list( + 'osd', + 'osd_mount_options_xfs', + default=constants.mount.get('xfs'), + split=' ', + ) + command.append( + _normalize_mount_flags(flags, extras=extras) + ) + command.append(device) + command.append(destination) + process.run(command) + + # Restore SELinux context + system.set_context(destination) + + +def _link_device(device, device_type, osd_id): + """ + Allow linking any device type in an OSD directory. ``device`` must the be + source, with an absolute path and ``device_type`` will be the destination + name, like 'journal', or 'block' + """ + device_path = '/var/lib/ceph/osd/%s-%s/%s' % ( + conf.cluster, + osd_id, + device_type + ) + command = ['ln', '-s', device, device_path] + system.chown(device) + + process.run(command) + +def _validate_bluestore_device(device, excepted_device_type, osd_uuid): + """ + Validate whether the given device is truly what it is supposed to be + """ + + out, err, ret = process.call(['ceph-bluestore-tool', 'show-label', '--dev', device]) + if err: + terminal.error('ceph-bluestore-tool failed to run. %s'% err) + raise SystemExit(1) + if ret: + terminal.error('no label on %s'% device) + raise SystemExit(1) + oj = json.loads(''.join(out)) + if device not in oj: + terminal.error('%s not in the output of ceph-bluestore-tool, buggy?'% device) + raise SystemExit(1) + current_device_type = oj[device]['description'] + if current_device_type != excepted_device_type: + terminal.error('%s is not a %s device but %s'% (device, excepted_device_type, current_device_type)) + raise SystemExit(1) + current_osd_uuid = oj[device]['osd_uuid'] + if current_osd_uuid != osd_uuid: + terminal.error('device %s is used by another osd %s as %s, should be %s'% (device, current_osd_uuid, current_device_type, osd_uuid)) + raise SystemExit(1) + +def link_journal(journal_device, osd_id): + _link_device(journal_device, 'journal', osd_id) + + +def link_block(block_device, osd_id): + _link_device(block_device, 'block', osd_id) + + +def link_wal(wal_device, osd_id, osd_uuid=None): + _validate_bluestore_device(wal_device, 'bluefs wal', osd_uuid) + _link_device(wal_device, 'block.wal', osd_id) + + +def link_db(db_device, osd_id, osd_uuid=None): + _validate_bluestore_device(db_device, 'bluefs db', osd_uuid) + _link_device(db_device, 'block.db', osd_id) + + +def get_monmap(osd_id): + """ + Before creating the OSD files, a monmap needs to be retrieved so that it + can be used to tell the monitor(s) about the new OSD. A call will look like:: + + ceph --cluster ceph --name client.bootstrap-osd \ + --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \ + mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap + """ + path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) + bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster + monmap_destination = os.path.join(path, 'activate.monmap') + + process.run([ + 'ceph', + '--cluster', conf.cluster, + '--name', 'client.bootstrap-osd', + '--keyring', bootstrap_keyring, + 'mon', 'getmap', '-o', monmap_destination + ]) + + +def get_osdspec_affinity(): + return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '') + + +def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False): + """ + Create the files for the OSD to function. A normal call will look like: + + ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ + --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ + --osd-data /var/lib/ceph/osd/ceph-0 \ + --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ + --keyring /var/lib/ceph/osd/ceph-0/keyring \ + --setuser ceph --setgroup ceph + + In some cases it is required to use the keyring, when it is passed in as + a keyword argument it is used as part of the ceph-osd command + """ + path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) + monmap = os.path.join(path, 'activate.monmap') + + system.chown(path) + + base_command = [ + 'ceph-osd', + '--cluster', conf.cluster, + '--osd-objectstore', 'bluestore', + '--mkfs', + '-i', osd_id, + '--monmap', monmap, + ] + + supplementary_command = [ + '--osd-data', path, + '--osd-uuid', fsid, + '--setuser', 'ceph', + '--setgroup', 'ceph' + ] + + if keyring is not None: + base_command.extend(['--keyfile', '-']) + + if wal: + base_command.extend( + ['--bluestore-block-wal-path', wal] + ) + system.chown(wal) + + if db: + base_command.extend( + ['--bluestore-block-db-path', db] + ) + system.chown(db) + + if get_osdspec_affinity(): + base_command.extend(['--osdspec-affinity', get_osdspec_affinity()]) + + command = base_command + supplementary_command + + """ + When running in containers the --mkfs on raw device sometimes fails + to acquire a lock through flock() on the device because systemd-udevd holds one temporarily. + See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock. + Because this is really transient, we retry up to 5 times and wait for 1 sec in-between + """ + for retry in range(5): + _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True) + if returncode == 0: + break + else: + if returncode == errno.EWOULDBLOCK: + time.sleep(1) + logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry) + continue + else: + raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) + + +def osd_mkfs_filestore(osd_id, fsid, keyring): + """ + Create the files for the OSD to function. A normal call will look like: + + ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ + --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ + --osd-data /var/lib/ceph/osd/ceph-0 \ + --osd-journal /var/lib/ceph/osd/ceph-0/journal \ + --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ + --keyring /var/lib/ceph/osd/ceph-0/keyring \ + --setuser ceph --setgroup ceph + + """ + path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) + monmap = os.path.join(path, 'activate.monmap') + journal = os.path.join(path, 'journal') + + system.chown(journal) + system.chown(path) + + command = [ + 'ceph-osd', + '--cluster', conf.cluster, + '--osd-objectstore', 'filestore', + '--mkfs', + '-i', osd_id, + '--monmap', monmap, + ] + + if get_osdspec_affinity(): + command.extend(['--osdspec-affinity', get_osdspec_affinity()]) + + if __release__ != 'luminous': + # goes through stdin + command.extend(['--keyfile', '-']) + + command.extend([ + '--osd-data', path, + '--osd-journal', journal, + '--osd-uuid', fsid, + '--setuser', 'ceph', + '--setgroup', 'ceph' + ]) + + _, _, returncode = process.call( + command, stdin=keyring, terminal_verbose=True, show_command=True + ) + if returncode != 0: + raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) diff --git a/src/ceph-volume/ceph_volume/util/system.py b/src/ceph-volume/ceph_volume/util/system.py new file mode 100644 index 00000000..49986233 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/system.py @@ -0,0 +1,346 @@ +import errno +import logging +import os +import pwd +import platform +import tempfile +import uuid +from ceph_volume import process, terminal +from . import as_string + +# python2 has no FileNotFoundError +try: + FileNotFoundError +except NameError: + FileNotFoundError = OSError + +logger = logging.getLogger(__name__) +mlogger = terminal.MultiLogger(__name__) + +# TODO: get these out of here and into a common area for others to consume +if platform.system() == 'FreeBSD': + FREEBSD = True + DEFAULT_FS_TYPE = 'zfs' + PROCDIR = '/compat/linux/proc' + # FreeBSD does not have blockdevices any more + BLOCKDIR = '/dev' + ROOTGROUP = 'wheel' +else: + FREEBSD = False + DEFAULT_FS_TYPE = 'xfs' + PROCDIR = '/proc' + BLOCKDIR = '/sys/block' + ROOTGROUP = 'root' + + +def generate_uuid(): + return str(uuid.uuid4()) + + +def which(executable): + """find the location of an executable""" + def _get_path(executable, locations): + for location in locations: + executable_path = os.path.join(location, executable) + if os.path.exists(executable_path) and os.path.isfile(executable_path): + return executable_path + return None + + path = os.getenv('PATH', '') + path_locations = path.split(':') + exec_in_path = _get_path(executable, path_locations) + if exec_in_path: + return exec_in_path + mlogger.warning('Executable {} not in PATH: {}'.format(executable, path)) + + static_locations = ( + '/usr/local/bin', + '/bin', + '/usr/bin', + '/usr/local/sbin', + '/usr/sbin', + '/sbin', + ) + exec_in_static_locations = _get_path(executable, static_locations) + if exec_in_static_locations: + mlogger.warning('Found executable under {}, please ensure $PATH is set correctly!'.format(exec_in_static_locations)) + return exec_in_static_locations + # fallback to just returning the argument as-is, to prevent a hard fail, + # and hoping that the system might have the executable somewhere custom + return executable + + +def get_ceph_user_ids(): + """ + Return the id and gid of the ceph user + """ + try: + user = pwd.getpwnam('ceph') + except KeyError: + # is this even possible? + raise RuntimeError('"ceph" user is not available in the current system') + return user[2], user[3] + + +def get_file_contents(path, default=''): + contents = default + if not os.path.exists(path): + return contents + try: + with open(path, 'r') as open_file: + contents = open_file.read().strip() + except Exception: + logger.exception('Failed to read contents from: %s' % path) + + return contents + + +def mkdir_p(path, chown=True): + """ + A `mkdir -p` that defaults to chown the path to the ceph user + """ + try: + os.mkdir(path) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + if chown: + uid, gid = get_ceph_user_ids() + os.chown(path, uid, gid) + + +def chown(path, recursive=True): + """ + ``chown`` a path to the ceph user (uid and guid fetched at runtime) + """ + uid, gid = get_ceph_user_ids() + if os.path.islink(path): + process.run(['chown', '-h', 'ceph:ceph', path]) + path = os.path.realpath(path) + if recursive: + process.run(['chown', '-R', 'ceph:ceph', path]) + else: + os.chown(path, uid, gid) + + +def is_binary(path): + """ + Detect if a file path is a binary or not. Will falsely report as binary + when utf-16 encoded. In the ceph universe there is no such risk (yet) + """ + with open(path, 'rb') as fp: + contents = fp.read(8192) + if b'\x00' in contents: # a null byte may signal binary + return True + return False + + +class tmp_mount(object): + """ + Temporarily mount a device on a temporary directory, + and unmount it upon exit + + When ``encrypted`` is set to ``True``, the exit method will call out to + close the device so that it doesn't remain open after mounting. It is + assumed that it will be open because otherwise it wouldn't be possible to + mount in the first place + """ + + def __init__(self, device, encrypted=False): + self.device = device + self.path = None + self.encrypted = encrypted + + def __enter__(self): + self.path = tempfile.mkdtemp() + process.run([ + 'mount', + '-v', + self.device, + self.path + ]) + return self.path + + def __exit__(self, exc_type, exc_val, exc_tb): + process.run([ + 'umount', + '-v', + self.path + ]) + if self.encrypted: + # avoid a circular import from the encryption module + from ceph_volume.util import encryption + encryption.dmcrypt_close(self.device) + + +def unmount_tmpfs(path): + """ + Removes the mount at the given path iff the path is a tmpfs mount point. + Otherwise no action is taken. + """ + _out, _err, rc = process.call(['findmnt', '-t', 'tmpfs', '-M', path]) + if rc != 0: + logger.info('{} does not appear to be a tmpfs mount'.format(path)) + else: + logger.info('Unmounting tmpfs path at {}'.format( path)) + unmount(path) + + +def unmount(path): + """ + Removes mounts at the given path + """ + process.run([ + 'umount', + '-v', + path, + ]) + + +def path_is_mounted(path, destination=None): + """ + Check if the given path is mounted + """ + mounts = get_mounts(paths=True) + realpath = os.path.realpath(path) + mounted_locations = mounts.get(realpath, []) + + if destination: + return destination in mounted_locations + return mounted_locations != [] + + +def device_is_mounted(dev, destination=None): + """ + Check if the given device is mounted, optionally validating that a + destination exists + """ + plain_mounts = get_mounts(devices=True) + realpath_mounts = get_mounts(devices=True, realpath=True) + realpath_dev = os.path.realpath(dev) if dev.startswith('/') else dev + destination = os.path.realpath(destination) if destination else None + # plain mounts + plain_dev_mounts = plain_mounts.get(dev, []) + realpath_dev_mounts = plain_mounts.get(realpath_dev, []) + # realpath mounts + plain_dev_real_mounts = realpath_mounts.get(dev, []) + realpath_dev_real_mounts = realpath_mounts.get(realpath_dev, []) + + mount_locations = [ + plain_dev_mounts, + realpath_dev_mounts, + plain_dev_real_mounts, + realpath_dev_real_mounts + ] + + for mounts in mount_locations: + if mounts: # we have a matching mount + if destination: + if destination in mounts: + logger.info( + '%s detected as mounted, exists at destination: %s', dev, destination + ) + return True + else: + logger.info('%s was found as mounted', dev) + return True + logger.info('%s was not found as mounted', dev) + return False + + +def get_mounts(devices=False, paths=False, realpath=False): + """ + Create a mapping of all available system mounts so that other helpers can + detect nicely what path or device is mounted + + It ignores (most of) non existing devices, but since some setups might need + some extra device information, it will make an exception for: + + - tmpfs + - devtmpfs + + If ``devices`` is set to ``True`` the mapping will be a device-to-path(s), + if ``paths`` is set to ``True`` then the mapping will be + a path-to-device(s) + + :param realpath: Resolve devices to use their realpaths. This is useful for + paths like LVM where more than one path can point to the same device + """ + devices_mounted = {} + paths_mounted = {} + do_not_skip = ['tmpfs', 'devtmpfs'] + default_to_devices = devices is False and paths is False + + with open(PROCDIR + '/mounts', 'rb') as mounts: + proc_mounts = mounts.readlines() + + for line in proc_mounts: + fields = [as_string(f) for f in line.split()] + if len(fields) < 3: + continue + if realpath: + device = os.path.realpath(fields[0]) if fields[0].startswith('/') else fields[0] + else: + device = fields[0] + path = os.path.realpath(fields[1]) + # only care about actual existing devices + if not os.path.exists(device) or not device.startswith('/'): + if device not in do_not_skip: + continue + if device in devices_mounted.keys(): + devices_mounted[device].append(path) + else: + devices_mounted[device] = [path] + if path in paths_mounted.keys(): + paths_mounted[path].append(device) + else: + paths_mounted[path] = [device] + + # Default to returning information for devices if + if devices is True or default_to_devices: + return devices_mounted + else: + return paths_mounted + + +def set_context(path, recursive=False): + """ + Calls ``restorecon`` to set the proper context on SELinux systems. Only if + the ``restorecon`` executable is found anywhere in the path it will get + called. + + If the ``CEPH_VOLUME_SKIP_RESTORECON`` environment variable is set to + any of: "1", "true", "yes" the call will be skipped as well. + + Finally, if SELinux is not enabled, or not available in the system, + ``restorecon`` will not be called. This is checked by calling out to the + ``selinuxenabled`` executable. If that tool is not installed or returns + a non-zero exit status then no further action is taken and this function + will return. + """ + skip = os.environ.get('CEPH_VOLUME_SKIP_RESTORECON', '') + if skip.lower() in ['1', 'true', 'yes']: + logger.info( + 'CEPH_VOLUME_SKIP_RESTORECON environ is set, will not call restorecon' + ) + return + + try: + stdout, stderr, code = process.call(['selinuxenabled'], + verbose_on_failure=False) + except FileNotFoundError: + logger.info('No SELinux found, skipping call to restorecon') + return + + if code != 0: + logger.info('SELinux is not enabled, will not call restorecon') + return + + # restore selinux context to default policy values + if which('restorecon').startswith('/'): + if recursive: + process.run(['restorecon', '-R', path]) + else: + process.run(['restorecon', path]) diff --git a/src/ceph-volume/ceph_volume/util/templates.py b/src/ceph-volume/ceph_volume/util/templates.py new file mode 100644 index 00000000..85a366d2 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/templates.py @@ -0,0 +1,49 @@ + +osd_header = """ +{:-^100}""".format('') + + +osd_component_titles = """ + Type Path LV Size % of device""" + + +osd_reused_id = """ + OSD id {id_: <55}""" + + +osd_component = """ + {_type: <15} {path: <55} {size: <15} {percent:.2%}""" + + +osd_encryption = """ + encryption: {enc: <15}""" + + +total_osds = """ +Total OSDs: {total_osds} +""" + + +def filtered_devices(devices): + string = """ +Filtered Devices:""" + for device, info in devices.items(): + string += """ + %s""" % device + + for reason in info['reasons']: + string += """ + %s""" % reason + + string += "\n" + return string + + +ssd_volume_group = """ +Solid State VG: + Targets: {target: <25} Total size: {total_lv_size: <25} + Total LVs: {total_lvs: <25} Size per LV: {lv_size: <25} + Devices: {block_db_devices} +""" + + |