diff options
Diffstat (limited to 'src/ceph-volume/ceph_volume/util/disk.py')
-rw-r--r-- | src/ceph-volume/ceph_volume/util/disk.py | 804 |
1 files changed, 804 insertions, 0 deletions
diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py new file mode 100644 index 00000000..2cf18cb5 --- /dev/null +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -0,0 +1,804 @@ +import logging +import os +import re +import stat +from ceph_volume import process +from ceph_volume.api import lvm +from ceph_volume.util.system import get_file_contents + + +logger = logging.getLogger(__name__) + + +# The blkid CLI tool has some oddities which prevents having one common call +# to extract the information instead of having separate utilities. The `udev` +# type of output is needed in older versions of blkid (v 2.23) that will not +# work correctly with just the ``-p`` flag to bypass the cache for example. +# Xenial doesn't have this problem as it uses a newer blkid version. + + +def get_partuuid(device): + """ + If a device is a partition, it will probably have a PARTUUID on it that + will persist and can be queried against `blkid` later to detect the actual + device + """ + out, err, rc = process.call( + ['blkid', '-s', 'PARTUUID', '-o', 'value', device] + ) + return ' '.join(out).strip() + + +def _blkid_parser(output): + """ + Parses the output from a system ``blkid`` call, requires output to be + produced using the ``-p`` flag which bypasses the cache, mangling the + names. These names are corrected to what it would look like without the + ``-p`` flag. + + Normal output:: + + /dev/sdb1: UUID="62416664-cbaf-40bd-9689-10bd337379c3" TYPE="xfs" [...] + """ + # first spaced separated item is garbage, gets tossed: + output = ' '.join(output.split()[1:]) + # split again, respecting possible whitespace in quoted values + pairs = output.split('" ') + raw = {} + processed = {} + mapping = { + 'UUID': 'UUID', + 'TYPE': 'TYPE', + 'PART_ENTRY_NAME': 'PARTLABEL', + 'PART_ENTRY_UUID': 'PARTUUID', + 'PART_ENTRY_TYPE': 'PARTTYPE', + 'PTTYPE': 'PTTYPE', + } + + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + raw[column] = value.strip().strip().strip('"') + + for key, value in raw.items(): + new_key = mapping.get(key) + if not new_key: + continue + processed[new_key] = value + + return processed + + +def blkid(device): + """ + The blkid interface to its CLI, creating an output similar to what is + expected from ``lsblk``. In most cases, ``lsblk()`` should be the preferred + method for extracting information about a device. There are some corner + cases where it might provide information that is otherwise unavailable. + + The system call uses the ``-p`` flag which bypasses the cache, the caveat + being that the keys produced are named completely different to expected + names. + + For example, instead of ``PARTLABEL`` it provides a ``PART_ENTRY_NAME``. + A bit of translation between these known keys is done, which is why + ``lsblk`` should always be preferred: the output provided here is not as + rich, given that a translation of keys is required for a uniform interface + with the ``-p`` flag. + + Label name to expected output chart: + + cache bypass name expected name + + UUID UUID + TYPE TYPE + PART_ENTRY_NAME PARTLABEL + PART_ENTRY_UUID PARTUUID + """ + out, err, rc = process.call( + ['blkid', '-p', device] + ) + return _blkid_parser(' '.join(out)) + + +def get_part_entry_type(device): + """ + Parses the ``ID_PART_ENTRY_TYPE`` from the "low level" (bypasses the cache) + output that uses the ``udev`` type of output. This output is intended to be + used for udev rules, but it is useful in this case as it is the only + consistent way to retrieve the GUID used by ceph-disk to identify devices. + """ + out, err, rc = process.call(['blkid', '-p', '-o', 'udev', device]) + for line in out: + if 'ID_PART_ENTRY_TYPE=' in line: + return line.split('=')[-1].strip() + return '' + + +def get_device_from_partuuid(partuuid): + """ + If a device has a partuuid, query blkid so that it can tell us what that + device is + """ + out, err, rc = process.call( + ['blkid', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device'] + ) + return ' '.join(out).strip() + + +def remove_partition(device): + """ + Removes a partition using parted + + :param device: A ``Device()`` object + """ + parent_device = '/dev/%s' % device.disk_api['PKNAME'] + udev_info = udevadm_property(device.abspath) + partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') + if not partition_number: + raise RuntimeError('Unable to detect the partition number for device: %s' % device.abspath) + + process.run( + ['parted', parent_device, '--script', '--', 'rm', partition_number] + ) + + +def _stat_is_device(stat_obj): + """ + Helper function that will interpret ``os.stat`` output directly, so that other + functions can call ``os.stat`` once and interpret that result several times + """ + return stat.S_ISBLK(stat_obj) + + +def _lsblk_parser(line): + """ + Parses lines in lsblk output. Requires output to be in pair mode (``-P`` flag). Lines + need to be whole strings, the line gets split when processed. + + :param line: A string, with the full line from lsblk output + """ + # parse the COLUMN="value" output to construct the dictionary + pairs = line.split('" ') + parsed = {} + for pair in pairs: + try: + column, value = pair.split('=') + except ValueError: + continue + parsed[column] = value.strip().strip().strip('"') + return parsed + + +def device_family(device): + """ + Returns a list of associated devices. It assumes that ``device`` is + a parent device. It is up to the caller to ensure that the device being + used is a parent, not a partition. + """ + labels = ['NAME', 'PARTLABEL', 'TYPE'] + command = ['lsblk', '-P', '-p', '-o', ','.join(labels), device] + out, err, rc = process.call(command) + devices = [] + for line in out: + devices.append(_lsblk_parser(line)) + + return devices + + +def udevadm_property(device, properties=[]): + """ + Query udevadm for information about device properties. + Optionally pass a list of properties to return. A requested property might + not be returned if not present. + + Expected output format:: + # udevadm info --query=property --name=/dev/sda :( + DEVNAME=/dev/sda + DEVTYPE=disk + ID_ATA=1 + ID_BUS=ata + ID_MODEL=SK_hynix_SC311_SATA_512GB + ID_PART_TABLE_TYPE=gpt + ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c + ID_PATH=pci-0000:00:17.0-ata-3 + ID_PATH_TAG=pci-0000_00_17_0-ata-3 + ID_REVISION=70000P10 + ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A + TAGS=:systemd: + USEC_INITIALIZED=16117769 + ... + """ + out = _udevadm_info(device) + ret = {} + for line in out: + p, v = line.split('=', 1) + if not properties or p in properties: + ret[p] = v + return ret + + +def _udevadm_info(device): + """ + Call udevadm and return the output + """ + cmd = ['udevadm', 'info', '--query=property', device] + out, _err, _rc = process.call(cmd) + return out + + +def lsblk(device, columns=None, abspath=False): + """ + Create a dictionary of identifying values for a device using ``lsblk``. + Each supported column is a key, in its *raw* format (all uppercase + usually). ``lsblk`` has support for certain "columns" (in blkid these + would be labels), and these columns vary between distributions and + ``lsblk`` versions. The newer versions support a richer set of columns, + while older ones were a bit limited. + + These are a subset of lsblk columns which are known to work on both CentOS 7 and Xenial: + + NAME device name + KNAME internal kernel device name + MAJ:MIN major:minor device number + FSTYPE filesystem type + MOUNTPOINT where the device is mounted + LABEL filesystem LABEL + UUID filesystem UUID + RO read-only device + RM removable device + MODEL device identifier + SIZE size of the device + STATE state of the device + OWNER user name + GROUP group name + MODE device node permissions + ALIGNMENT alignment offset + MIN-IO minimum I/O size + OPT-IO optimal I/O size + PHY-SEC physical sector size + LOG-SEC logical sector size + ROTA rotational device + SCHED I/O scheduler name + RQ-SIZE request queue size + TYPE device type + PKNAME internal parent kernel device name + DISC-ALN discard alignment offset + DISC-GRAN discard granularity + DISC-MAX discard max bytes + DISC-ZERO discard zeroes data + + There is a bug in ``lsblk`` where using all the available (supported) + columns will result in no output (!), in order to workaround this the + following columns have been removed from the default reporting columns: + + * RQ-SIZE (request queue size) + * MIN-IO minimum I/O size + * OPT-IO optimal I/O size + + These should be available however when using `columns`. For example:: + + >>> lsblk('/dev/sda1', columns=['OPT-IO']) + {'OPT-IO': '0'} + + Normal CLI output, as filtered by the flags in this function will look like :: + + $ lsblk --nodeps -P -o NAME,KNAME,MAJ:MIN,FSTYPE,MOUNTPOINT + NAME="sda1" KNAME="sda1" MAJ:MIN="8:1" FSTYPE="ext4" MOUNTPOINT="/" + + :param columns: A list of columns to report as keys in its original form. + :param abspath: Set the flag for absolute paths on the report + """ + default_columns = [ + 'NAME', 'KNAME', 'MAJ:MIN', 'FSTYPE', 'MOUNTPOINT', 'LABEL', 'UUID', + 'RO', 'RM', 'MODEL', 'SIZE', 'STATE', 'OWNER', 'GROUP', 'MODE', + 'ALIGNMENT', 'PHY-SEC', 'LOG-SEC', 'ROTA', 'SCHED', 'TYPE', 'DISC-ALN', + 'DISC-GRAN', 'DISC-MAX', 'DISC-ZERO', 'PKNAME', 'PARTLABEL' + ] + device = device.rstrip('/') + columns = columns or default_columns + # --nodeps -> Avoid adding children/parents to the device, only give information + # on the actual device we are querying for + # -P -> Produce pairs of COLUMN="value" + # -p -> Return full paths to devices, not just the names, when ``abspath`` is set + # -o -> Use the columns specified or default ones provided by this function + base_command = ['lsblk', '--nodeps', '-P'] + if abspath: + base_command.append('-p') + base_command.append('-o') + base_command.append(','.join(columns)) + base_command.append(device) + out, err, rc = process.call(base_command) + + if rc != 0: + return {} + + return _lsblk_parser(' '.join(out)) + + +def is_device(dev): + """ + Boolean to determine if a given device is a block device (**not** + a partition!) + + For example: /dev/sda would return True, but not /dev/sdc1 + """ + if not os.path.exists(dev): + return False + # use lsblk first, fall back to using stat + TYPE = lsblk(dev).get('TYPE') + if TYPE: + return TYPE == 'disk' + + # fallback to stat + return _stat_is_device(os.lstat(dev).st_mode) + if stat.S_ISBLK(os.lstat(dev)): + return True + return False + + +def is_partition(dev): + """ + Boolean to determine if a given device is a partition, like /dev/sda1 + """ + if not os.path.exists(dev): + return False + # use lsblk first, fall back to using stat + TYPE = lsblk(dev).get('TYPE') + if TYPE: + return TYPE == 'part' + + # fallback to stat + stat_obj = os.stat(dev) + if _stat_is_device(stat_obj.st_mode): + return False + + major = os.major(stat_obj.st_rdev) + minor = os.minor(stat_obj.st_rdev) + if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)): + return True + return False + + +class BaseFloatUnit(float): + """ + Base class to support float representations of size values. Suffix is + computed on child classes by inspecting the class name + """ + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self.__float__()) + + def __str__(self): + return "{size:.2f} {suffix}".format( + size=self.__float__(), + suffix=self.__class__.__name__.split('Float')[-1] + ) + + def as_int(self): + return int(self.real) + + def as_float(self): + return self.real + + +class FloatB(BaseFloatUnit): + pass + + +class FloatMB(BaseFloatUnit): + pass + + +class FloatGB(BaseFloatUnit): + pass + + +class FloatKB(BaseFloatUnit): + pass + + +class FloatTB(BaseFloatUnit): + pass + + +class Size(object): + """ + Helper to provide an interface for different sizes given a single initial + input. Allows for comparison between different size objects, which avoids + the need to convert sizes before comparison (e.g. comparing megabytes + against gigabytes). + + Common comparison operators are supported:: + + >>> hd1 = Size(gb=400) + >>> hd2 = Size(gb=500) + >>> hd1 > hd2 + False + >>> hd1 < hd2 + True + >>> hd1 == hd2 + False + >>> hd1 == Size(gb=400) + True + + The Size object can also be multiplied or divided:: + + >>> hd1 + <Size(400.00 GB)> + >>> hd1 * 2 + <Size(800.00 GB)> + >>> hd1 + <Size(800.00 GB)> + + Additions and subtractions are only supported between Size objects:: + + >>> Size(gb=224) - Size(gb=100) + <Size(124.00 GB)> + >>> Size(gb=1) + Size(mb=300) + <Size(1.29 GB)> + + Can also display a human-readable representation, with automatic detection + on best suited unit, or alternatively, specific unit representation:: + + >>> s = Size(mb=2211) + >>> s + <Size(2.16 GB)> + >>> s.mb + <FloatMB(2211.0)> + >>> print "Total size: %s" % s.mb + Total size: 2211.00 MB + >>> print "Total size: %s" % s + Total size: 2.16 GB + """ + + @classmethod + def parse(cls, size): + if (len(size) > 2 and + size[-2].lower() in ['k', 'm', 'g', 't'] and + size[-1].lower() == 'b'): + return cls(**{size[-2:].lower(): float(size[0:-2])}) + elif size[-1].lower() in ['b', 'k', 'm', 'g', 't']: + return cls(**{size[-1].lower(): float(size[0:-1])}) + else: + return cls(b=float(size)) + + + def __init__(self, multiplier=1024, **kw): + self._multiplier = multiplier + # create a mapping of units-to-multiplier, skip bytes as that is + # calculated initially always and does not need to convert + aliases = [ + [('k', 'kb', 'kilobytes'), self._multiplier], + [('m', 'mb', 'megabytes'), self._multiplier ** 2], + [('g', 'gb', 'gigabytes'), self._multiplier ** 3], + [('t', 'tb', 'terabytes'), self._multiplier ** 4], + ] + # and mappings for units-to-formatters, including bytes and aliases for + # each + format_aliases = [ + [('b', 'bytes'), FloatB], + [('kb', 'kilobytes'), FloatKB], + [('mb', 'megabytes'), FloatMB], + [('gb', 'gigabytes'), FloatGB], + [('tb', 'terabytes'), FloatTB], + ] + self._formatters = {} + for key, value in format_aliases: + for alias in key: + self._formatters[alias] = value + self._factors = {} + for key, value in aliases: + for alias in key: + self._factors[alias] = value + + for k, v in kw.items(): + self._convert(v, k) + # only pursue the first occurrence + break + + def _convert(self, size, unit): + """ + Convert any size down to bytes so that other methods can rely on bytes + being available always, regardless of what they pass in, avoiding the + need for a mapping of every permutation. + """ + if unit in ['b', 'bytes']: + self._b = size + return + factor = self._factors[unit] + self._b = float(size * factor) + + def _get_best_format(self): + """ + Go through all the supported units, and use the first one that is less + than 1024. This allows to represent size in the most readable format + available + """ + for unit in ['b', 'kb', 'mb', 'gb', 'tb']: + if getattr(self, unit) > 1024: + continue + return getattr(self, unit) + + def __repr__(self): + return "<Size(%s)>" % self._get_best_format() + + def __str__(self): + return "%s" % self._get_best_format() + + def __format__(self, spec): + return str(self._get_best_format()).__format__(spec) + + def __int__(self): + return int(self._b) + + def __float__(self): + return self._b + + def __lt__(self, other): + if isinstance(other, Size): + return self._b < other._b + else: + return self.b < other + + def __le__(self, other): + if isinstance(other, Size): + return self._b <= other._b + else: + return self.b <= other + + def __eq__(self, other): + if isinstance(other, Size): + return self._b == other._b + else: + return self.b == other + + def __ne__(self, other): + if isinstance(other, Size): + return self._b != other._b + else: + return self.b != other + + def __ge__(self, other): + if isinstance(other, Size): + return self._b >= other._b + else: + return self.b >= other + + def __gt__(self, other): + if isinstance(other, Size): + return self._b > other._b + else: + return self.b > other + + def __add__(self, other): + if isinstance(other, Size): + _b = self._b + other._b + return Size(b=_b) + raise TypeError('Cannot add "Size" object with int') + + def __sub__(self, other): + if isinstance(other, Size): + _b = self._b - other._b + return Size(b=_b) + raise TypeError('Cannot subtract "Size" object from int') + + def __mul__(self, other): + if isinstance(other, Size): + raise TypeError('Cannot multiply with "Size" object') + _b = self._b * other + return Size(b=_b) + + def __truediv__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __div__(self, other): + if isinstance(other, Size): + return self._b / other._b + _b = self._b / other + return Size(b=_b) + + def __bool__(self): + return self.b != 0 + + def __nonzero__(self): + return self.__bool__() + + def __getattr__(self, unit): + """ + Calculate units on the fly, relies on the fact that ``bytes`` has been + converted at instantiation. Units that don't exist will trigger an + ``AttributeError`` + """ + try: + formatter = self._formatters[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + if unit in ['b', 'bytes']: + return formatter(self._b) + try: + factor = self._factors[unit] + except KeyError: + raise AttributeError('Size object has not attribute "%s"' % unit) + return formatter(float(self._b) / factor) + + +def human_readable_size(size): + """ + Take a size in bytes, and transform it into a human readable size with up + to two decimals of precision. + """ + suffixes = ['B', 'KB', 'MB', 'GB', 'TB'] + suffix_index = 0 + while size > 1024: + suffix_index += 1 + size = size / 1024.0 + return "{size:.2f} {suffix}".format( + size=size, + suffix=suffixes[suffix_index]) + + +def size_from_human_readable(s): + """ + Takes a human readable string and converts into a Size. If no unit is + passed, bytes is assumed. + """ + s = s.replace(' ', '') + if s[-1].isdigit(): + return Size(b=float(s)) + n = float(s[:-1]) + if s[-1].lower() == 't': + return Size(tb=n) + if s[-1].lower() == 'g': + return Size(gb=n) + if s[-1].lower() == 'm': + return Size(mb=n) + if s[-1].lower() == 'k': + return Size(kb=n) + return None + + +def get_partitions_facts(sys_block_path): + partition_metadata = {} + for folder in os.listdir(sys_block_path): + folder_path = os.path.join(sys_block_path, folder) + if os.path.exists(os.path.join(folder_path, 'partition')): + contents = get_file_contents(os.path.join(folder_path, 'partition')) + if contents: + part = {} + partname = folder + part_sys_block_path = os.path.join(sys_block_path, partname) + + part['start'] = get_file_contents(part_sys_block_path + "/start", 0) + part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0) + + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/logical_block_size") + if not part['sectorsize']: + part['sectorsize'] = get_file_contents( + part_sys_block_path + "/queue/hw_sector_size", 512) + part['size'] = float(part['sectors']) * 512 + part['human_readable_size'] = human_readable_size(float(part['sectors']) * 512) + part['holders'] = [] + for holder in os.listdir(part_sys_block_path + '/holders'): + part['holders'].append(holder) + + partition_metadata[partname] = part + return partition_metadata + + +def is_mapper_device(device_name): + return device_name.startswith(('/dev/mapper', '/dev/dm-')) + + +def is_locked_raw_device(disk_path): + """ + A device can be locked by a third party software like a database. + To detect that case, the device is opened in Read/Write and exclusive mode + """ + open_flags = (os.O_RDWR | os.O_EXCL) + open_mode = 0 + fd = None + + try: + fd = os.open(disk_path, open_flags, open_mode) + except OSError: + return 1 + + try: + os.close(fd) + except OSError: + return 1 + + return 0 + + +def get_block_devs_lsblk(): + ''' + This returns a list of lists with 3 items per inner list. + KNAME - reflects the kernel device name , for example /dev/sda or /dev/dm-0 + NAME - the device name, for example /dev/sda or + /dev/mapper/<vg_name>-<lv_name> + TYPE - the block device type: disk, partition, lvm and such + + ''' + cmd = ['lsblk', '-plno', 'KNAME,NAME,TYPE'] + stdout, stderr, rc = process.call(cmd) + # lsblk returns 1 on failure + if rc == 1: + raise OSError('lsblk returned failure, stderr: {}'.format(stderr)) + return [re.split(r'\s+', line) for line in stdout] + + +def get_devices(_sys_block_path='/sys/block'): + """ + Captures all available block devices as reported by lsblk. + Additional interesting metadata like sectors, size, vendor, + solid/rotational, etc. is collected from /sys/block/<device> + + Returns a dictionary, where keys are the full paths to devices. + + ..note:: loop devices, removable media, and logical volumes are never included. + """ + + device_facts = {} + + block_devs = get_block_devs_lsblk() + + for block in block_devs: + devname = os.path.basename(block[0]) + diskname = block[1] + if block[2] != 'disk': + continue + sysdir = os.path.join(_sys_block_path, devname) + metadata = {} + + # If the mapper device is a logical volume it gets excluded + if is_mapper_device(diskname): + if lvm.get_device_lvs(diskname): + continue + + # all facts that have no defaults + # (<name>, <path relative to _sys_block_path>) + facts = [('removable', 'removable'), + ('ro', 'ro'), + ('vendor', 'device/vendor'), + ('model', 'device/model'), + ('rev', 'device/rev'), + ('sas_address', 'device/sas_address'), + ('sas_device_handle', 'device/sas_device_handle'), + ('support_discard', 'queue/discard_granularity'), + ('rotational', 'queue/rotational'), + ('nr_requests', 'queue/nr_requests'), + ] + for key, file_ in facts: + metadata[key] = get_file_contents(os.path.join(sysdir, file_)) + + metadata['scheduler_mode'] = "" + scheduler = get_file_contents(sysdir + "/queue/scheduler") + if scheduler is not None: + m = re.match(r".*?(\[(.*)\])", scheduler) + if m: + metadata['scheduler_mode'] = m.group(2) + + metadata['partitions'] = get_partitions_facts(sysdir) + + size = get_file_contents(os.path.join(sysdir, 'size'), 0) + + metadata['sectors'] = get_file_contents(os.path.join(sysdir, 'sectors'), 0) + fallback_sectorsize = get_file_contents(sysdir + "/queue/hw_sector_size", 512) + metadata['sectorsize'] = get_file_contents(sysdir + + "/queue/logical_block_size", + fallback_sectorsize) + metadata['size'] = float(size) * 512 + metadata['human_readable_size'] = human_readable_size(metadata['size']) + metadata['path'] = diskname + metadata['locked'] = is_locked_raw_device(metadata['path']) + + device_facts[diskname] = metadata + return device_facts |