import logging import os import re import stat from ceph_volume import process from ceph_volume.api import lvm from ceph_volume.util.system import get_file_contents logger = logging.getLogger(__name__) # The blkid CLI tool has some oddities which prevents having one common call # to extract the information instead of having separate utilities. The `udev` # type of output is needed in older versions of blkid (v 2.23) that will not # work correctly with just the ``-p`` flag to bypass the cache for example. # Xenial doesn't have this problem as it uses a newer blkid version. def get_partuuid(device): """ If a device is a partition, it will probably have a PARTUUID on it that will persist and can be queried against `blkid` later to detect the actual device """ out, err, rc = process.call( ['blkid', '-s', 'PARTUUID', '-o', 'value', device] ) return ' '.join(out).strip() def _blkid_parser(output): """ Parses the output from a system ``blkid`` call, requires output to be produced using the ``-p`` flag which bypasses the cache, mangling the names. These names are corrected to what it would look like without the ``-p`` flag. Normal output:: /dev/sdb1: UUID="62416664-cbaf-40bd-9689-10bd337379c3" TYPE="xfs" [...] """ # first spaced separated item is garbage, gets tossed: output = ' '.join(output.split()[1:]) # split again, respecting possible whitespace in quoted values pairs = output.split('" ') raw = {} processed = {} mapping = { 'UUID': 'UUID', 'TYPE': 'TYPE', 'PART_ENTRY_NAME': 'PARTLABEL', 'PART_ENTRY_UUID': 'PARTUUID', 'PART_ENTRY_TYPE': 'PARTTYPE', 'PTTYPE': 'PTTYPE', } for pair in pairs: try: column, value = pair.split('=') except ValueError: continue raw[column] = value.strip().strip().strip('"') for key, value in raw.items(): new_key = mapping.get(key) if not new_key: continue processed[new_key] = value return processed def blkid(device): """ The blkid interface to its CLI, creating an output similar to what is expected from ``lsblk``. In most cases, ``lsblk()`` should be the preferred method for extracting information about a device. There are some corner cases where it might provide information that is otherwise unavailable. The system call uses the ``-p`` flag which bypasses the cache, the caveat being that the keys produced are named completely different to expected names. For example, instead of ``PARTLABEL`` it provides a ``PART_ENTRY_NAME``. A bit of translation between these known keys is done, which is why ``lsblk`` should always be preferred: the output provided here is not as rich, given that a translation of keys is required for a uniform interface with the ``-p`` flag. Label name to expected output chart: cache bypass name expected name UUID UUID TYPE TYPE PART_ENTRY_NAME PARTLABEL PART_ENTRY_UUID PARTUUID """ out, err, rc = process.call( ['blkid', '-p', device] ) return _blkid_parser(' '.join(out)) def get_part_entry_type(device): """ Parses the ``ID_PART_ENTRY_TYPE`` from the "low level" (bypasses the cache) output that uses the ``udev`` type of output. This output is intended to be used for udev rules, but it is useful in this case as it is the only consistent way to retrieve the GUID used by ceph-disk to identify devices. """ out, err, rc = process.call(['blkid', '-p', '-o', 'udev', device]) for line in out: if 'ID_PART_ENTRY_TYPE=' in line: return line.split('=')[-1].strip() return '' def get_device_from_partuuid(partuuid): """ If a device has a partuuid, query blkid so that it can tell us what that device is """ out, err, rc = process.call( ['blkid', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device'] ) return ' '.join(out).strip() def remove_partition(device): """ Removes a partition using parted :param device: A ``Device()`` object """ parent_device = '/dev/%s' % device.disk_api['PKNAME'] udev_info = udevadm_property(device.abspath) partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') if not partition_number: raise RuntimeError('Unable to detect the partition number for device: %s' % device.abspath) process.run( ['parted', parent_device, '--script', '--', 'rm', partition_number] ) def _stat_is_device(stat_obj): """ Helper function that will interpret ``os.stat`` output directly, so that other functions can call ``os.stat`` once and interpret that result several times """ return stat.S_ISBLK(stat_obj) def _lsblk_parser(line): """ Parses lines in lsblk output. Requires output to be in pair mode (``-P`` flag). Lines need to be whole strings, the line gets split when processed. :param line: A string, with the full line from lsblk output """ # parse the COLUMN="value" output to construct the dictionary pairs = line.split('" ') parsed = {} for pair in pairs: try: column, value = pair.split('=') except ValueError: continue parsed[column] = value.strip().strip().strip('"') return parsed def device_family(device): """ Returns a list of associated devices. It assumes that ``device`` is a parent device. It is up to the caller to ensure that the device being used is a parent, not a partition. """ labels = ['NAME', 'PARTLABEL', 'TYPE'] command = ['lsblk', '-P', '-p', '-o', ','.join(labels), device] out, err, rc = process.call(command) devices = [] for line in out: devices.append(_lsblk_parser(line)) return devices def udevadm_property(device, properties=[]): """ Query udevadm for information about device properties. Optionally pass a list of properties to return. A requested property might not be returned if not present. Expected output format:: # udevadm info --query=property --name=/dev/sda :( DEVNAME=/dev/sda DEVTYPE=disk ID_ATA=1 ID_BUS=ata ID_MODEL=SK_hynix_SC311_SATA_512GB ID_PART_TABLE_TYPE=gpt ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c ID_PATH=pci-0000:00:17.0-ata-3 ID_PATH_TAG=pci-0000_00_17_0-ata-3 ID_REVISION=70000P10 ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A TAGS=:systemd: USEC_INITIALIZED=16117769 ... """ out = _udevadm_info(device) ret = {} for line in out: p, v = line.split('=', 1) if not properties or p in properties: ret[p] = v return ret def _udevadm_info(device): """ Call udevadm and return the output """ cmd = ['udevadm', 'info', '--query=property', device] out, _err, _rc = process.call(cmd) return out def lsblk(device, columns=None, abspath=False): """ Create a dictionary of identifying values for a device using ``lsblk``. Each supported column is a key, in its *raw* format (all uppercase usually). ``lsblk`` has support for certain "columns" (in blkid these would be labels), and these columns vary between distributions and ``lsblk`` versions. The newer versions support a richer set of columns, while older ones were a bit limited. These are a subset of lsblk columns which are known to work on both CentOS 7 and Xenial: NAME device name KNAME internal kernel device name MAJ:MIN major:minor device number FSTYPE filesystem type MOUNTPOINT where the device is mounted LABEL filesystem LABEL UUID filesystem UUID RO read-only device RM removable device MODEL device identifier SIZE size of the device STATE state of the device OWNER user name GROUP group name MODE device node permissions ALIGNMENT alignment offset MIN-IO minimum I/O size OPT-IO optimal I/O size PHY-SEC physical sector size LOG-SEC logical sector size ROTA rotational device SCHED I/O scheduler name RQ-SIZE request queue size TYPE device type PKNAME internal parent kernel device name DISC-ALN discard alignment offset DISC-GRAN discard granularity DISC-MAX discard max bytes DISC-ZERO discard zeroes data There is a bug in ``lsblk`` where using all the available (supported) columns will result in no output (!), in order to workaround this the following columns have been removed from the default reporting columns: * RQ-SIZE (request queue size) * MIN-IO minimum I/O size * OPT-IO optimal I/O size These should be available however when using `columns`. For example:: >>> lsblk('/dev/sda1', columns=['OPT-IO']) {'OPT-IO': '0'} Normal CLI output, as filtered by the flags in this function will look like :: $ lsblk --nodeps -P -o NAME,KNAME,MAJ:MIN,FSTYPE,MOUNTPOINT NAME="sda1" KNAME="sda1" MAJ:MIN="8:1" FSTYPE="ext4" MOUNTPOINT="/" :param columns: A list of columns to report as keys in its original form. :param abspath: Set the flag for absolute paths on the report """ default_columns = [ 'NAME', 'KNAME', 'MAJ:MIN', 'FSTYPE', 'MOUNTPOINT', 'LABEL', 'UUID', 'RO', 'RM', 'MODEL', 'SIZE', 'STATE', 'OWNER', 'GROUP', 'MODE', 'ALIGNMENT', 'PHY-SEC', 'LOG-SEC', 'ROTA', 'SCHED', 'TYPE', 'DISC-ALN', 'DISC-GRAN', 'DISC-MAX', 'DISC-ZERO', 'PKNAME', 'PARTLABEL' ] device = device.rstrip('/') columns = columns or default_columns # --nodeps -> Avoid adding children/parents to the device, only give information # on the actual device we are querying for # -P -> Produce pairs of COLUMN="value" # -p -> Return full paths to devices, not just the names, when ``abspath`` is set # -o -> Use the columns specified or default ones provided by this function base_command = ['lsblk', '--nodeps', '-P'] if abspath: base_command.append('-p') base_command.append('-o') base_command.append(','.join(columns)) base_command.append(device) out, err, rc = process.call(base_command) if rc != 0: return {} return _lsblk_parser(' '.join(out)) def is_device(dev): """ Boolean to determine if a given device is a block device (**not** a partition!) For example: /dev/sda would return True, but not /dev/sdc1 """ if not os.path.exists(dev): return False # use lsblk first, fall back to using stat TYPE = lsblk(dev).get('TYPE') if TYPE: return TYPE == 'disk' # fallback to stat return _stat_is_device(os.lstat(dev).st_mode) if stat.S_ISBLK(os.lstat(dev)): return True return False def is_partition(dev): """ Boolean to determine if a given device is a partition, like /dev/sda1 """ if not os.path.exists(dev): return False # use lsblk first, fall back to using stat TYPE = lsblk(dev).get('TYPE') if TYPE: return TYPE == 'part' # fallback to stat stat_obj = os.stat(dev) if _stat_is_device(stat_obj.st_mode): return False major = os.major(stat_obj.st_rdev) minor = os.minor(stat_obj.st_rdev) if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)): return True return False class BaseFloatUnit(float): """ Base class to support float representations of size values. Suffix is computed on child classes by inspecting the class name """ def __repr__(self): return "<%s(%s)>" % (self.__class__.__name__, self.__float__()) def __str__(self): return "{size:.2f} {suffix}".format( size=self.__float__(), suffix=self.__class__.__name__.split('Float')[-1] ) def as_int(self): return int(self.real) def as_float(self): return self.real class FloatB(BaseFloatUnit): pass class FloatMB(BaseFloatUnit): pass class FloatGB(BaseFloatUnit): pass class FloatKB(BaseFloatUnit): pass class FloatTB(BaseFloatUnit): pass class Size(object): """ Helper to provide an interface for different sizes given a single initial input. Allows for comparison between different size objects, which avoids the need to convert sizes before comparison (e.g. comparing megabytes against gigabytes). Common comparison operators are supported:: >>> hd1 = Size(gb=400) >>> hd2 = Size(gb=500) >>> hd1 > hd2 False >>> hd1 < hd2 True >>> hd1 == hd2 False >>> hd1 == Size(gb=400) True The Size object can also be multiplied or divided:: >>> hd1 >>> hd1 * 2 >>> hd1 Additions and subtractions are only supported between Size objects:: >>> Size(gb=224) - Size(gb=100) >>> Size(gb=1) + Size(mb=300) Can also display a human-readable representation, with automatic detection on best suited unit, or alternatively, specific unit representation:: >>> s = Size(mb=2211) >>> s >>> s.mb >>> print "Total size: %s" % s.mb Total size: 2211.00 MB >>> print "Total size: %s" % s Total size: 2.16 GB """ @classmethod def parse(cls, size): if (len(size) > 2 and size[-2].lower() in ['k', 'm', 'g', 't'] and size[-1].lower() == 'b'): return cls(**{size[-2:].lower(): float(size[0:-2])}) elif size[-1].lower() in ['b', 'k', 'm', 'g', 't']: return cls(**{size[-1].lower(): float(size[0:-1])}) else: return cls(b=float(size)) def __init__(self, multiplier=1024, **kw): self._multiplier = multiplier # create a mapping of units-to-multiplier, skip bytes as that is # calculated initially always and does not need to convert aliases = [ [('k', 'kb', 'kilobytes'), self._multiplier], [('m', 'mb', 'megabytes'), self._multiplier ** 2], [('g', 'gb', 'gigabytes'), self._multiplier ** 3], [('t', 'tb', 'terabytes'), self._multiplier ** 4], ] # and mappings for units-to-formatters, including bytes and aliases for # each format_aliases = [ [('b', 'bytes'), FloatB], [('kb', 'kilobytes'), FloatKB], [('mb', 'megabytes'), FloatMB], [('gb', 'gigabytes'), FloatGB], [('tb', 'terabytes'), FloatTB], ] self._formatters = {} for key, value in format_aliases: for alias in key: self._formatters[alias] = value self._factors = {} for key, value in aliases: for alias in key: self._factors[alias] = value for k, v in kw.items(): self._convert(v, k) # only pursue the first occurrence break def _convert(self, size, unit): """ Convert any size down to bytes so that other methods can rely on bytes being available always, regardless of what they pass in, avoiding the need for a mapping of every permutation. """ if unit in ['b', 'bytes']: self._b = size return factor = self._factors[unit] self._b = float(size * factor) def _get_best_format(self): """ Go through all the supported units, and use the first one that is less than 1024. This allows to represent size in the most readable format available """ for unit in ['b', 'kb', 'mb', 'gb', 'tb']: if getattr(self, unit) > 1024: continue return getattr(self, unit) def __repr__(self): return "" % self._get_best_format() def __str__(self): return "%s" % self._get_best_format() def __format__(self, spec): return str(self._get_best_format()).__format__(spec) def __int__(self): return int(self._b) def __float__(self): return self._b def __lt__(self, other): if isinstance(other, Size): return self._b < other._b else: return self.b < other def __le__(self, other): if isinstance(other, Size): return self._b <= other._b else: return self.b <= other def __eq__(self, other): if isinstance(other, Size): return self._b == other._b else: return self.b == other def __ne__(self, other): if isinstance(other, Size): return self._b != other._b else: return self.b != other def __ge__(self, other): if isinstance(other, Size): return self._b >= other._b else: return self.b >= other def __gt__(self, other): if isinstance(other, Size): return self._b > other._b else: return self.b > other def __add__(self, other): if isinstance(other, Size): _b = self._b + other._b return Size(b=_b) raise TypeError('Cannot add "Size" object with int') def __sub__(self, other): if isinstance(other, Size): _b = self._b - other._b return Size(b=_b) raise TypeError('Cannot subtract "Size" object from int') def __mul__(self, other): if isinstance(other, Size): raise TypeError('Cannot multiply with "Size" object') _b = self._b * other return Size(b=_b) def __truediv__(self, other): if isinstance(other, Size): return self._b / other._b _b = self._b / other return Size(b=_b) def __div__(self, other): if isinstance(other, Size): return self._b / other._b _b = self._b / other return Size(b=_b) def __bool__(self): return self.b != 0 def __nonzero__(self): return self.__bool__() def __getattr__(self, unit): """ Calculate units on the fly, relies on the fact that ``bytes`` has been converted at instantiation. Units that don't exist will trigger an ``AttributeError`` """ try: formatter = self._formatters[unit] except KeyError: raise AttributeError('Size object has not attribute "%s"' % unit) if unit in ['b', 'bytes']: return formatter(self._b) try: factor = self._factors[unit] except KeyError: raise AttributeError('Size object has not attribute "%s"' % unit) return formatter(float(self._b) / factor) def human_readable_size(size): """ Take a size in bytes, and transform it into a human readable size with up to two decimals of precision. """ suffixes = ['B', 'KB', 'MB', 'GB', 'TB'] suffix_index = 0 while size > 1024: suffix_index += 1 size = size / 1024.0 return "{size:.2f} {suffix}".format( size=size, suffix=suffixes[suffix_index]) def size_from_human_readable(s): """ Takes a human readable string and converts into a Size. If no unit is passed, bytes is assumed. """ s = s.replace(' ', '') if s[-1].isdigit(): return Size(b=float(s)) n = float(s[:-1]) if s[-1].lower() == 't': return Size(tb=n) if s[-1].lower() == 'g': return Size(gb=n) if s[-1].lower() == 'm': return Size(mb=n) if s[-1].lower() == 'k': return Size(kb=n) return None def get_partitions_facts(sys_block_path): partition_metadata = {} for folder in os.listdir(sys_block_path): folder_path = os.path.join(sys_block_path, folder) if os.path.exists(os.path.join(folder_path, 'partition')): contents = get_file_contents(os.path.join(folder_path, 'partition')) if contents: part = {} partname = folder part_sys_block_path = os.path.join(sys_block_path, partname) part['start'] = get_file_contents(part_sys_block_path + "/start", 0) part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0) part['sectorsize'] = get_file_contents( part_sys_block_path + "/queue/logical_block_size") if not part['sectorsize']: part['sectorsize'] = get_file_contents( part_sys_block_path + "/queue/hw_sector_size", 512) part['size'] = float(part['sectors']) * 512 part['human_readable_size'] = human_readable_size(float(part['sectors']) * 512) part['holders'] = [] for holder in os.listdir(part_sys_block_path + '/holders'): part['holders'].append(holder) partition_metadata[partname] = part return partition_metadata def is_mapper_device(device_name): return device_name.startswith(('/dev/mapper', '/dev/dm-')) def is_locked_raw_device(disk_path): """ A device can be locked by a third party software like a database. To detect that case, the device is opened in Read/Write and exclusive mode """ open_flags = (os.O_RDWR | os.O_EXCL) open_mode = 0 fd = None try: fd = os.open(disk_path, open_flags, open_mode) except OSError: return 1 try: os.close(fd) except OSError: return 1 return 0 def get_block_devs_lsblk(): ''' This returns a list of lists with 3 items per inner list. KNAME - reflects the kernel device name , for example /dev/sda or /dev/dm-0 NAME - the device name, for example /dev/sda or /dev/mapper/- TYPE - the block device type: disk, partition, lvm and such ''' cmd = ['lsblk', '-plno', 'KNAME,NAME,TYPE'] stdout, stderr, rc = process.call(cmd) # lsblk returns 1 on failure if rc == 1: raise OSError('lsblk returned failure, stderr: {}'.format(stderr)) return [re.split(r'\s+', line) for line in stdout] def get_devices(_sys_block_path='/sys/block'): """ Captures all available block devices as reported by lsblk. Additional interesting metadata like sectors, size, vendor, solid/rotational, etc. is collected from /sys/block/ Returns a dictionary, where keys are the full paths to devices. ..note:: loop devices, removable media, and logical volumes are never included. """ device_facts = {} block_devs = get_block_devs_lsblk() for block in block_devs: devname = os.path.basename(block[0]) diskname = block[1] if block[2] != 'disk': continue sysdir = os.path.join(_sys_block_path, devname) metadata = {} # If the mapper device is a logical volume it gets excluded if is_mapper_device(diskname): if lvm.get_device_lvs(diskname): continue # all facts that have no defaults # (, ) facts = [('removable', 'removable'), ('ro', 'ro'), ('vendor', 'device/vendor'), ('model', 'device/model'), ('rev', 'device/rev'), ('sas_address', 'device/sas_address'), ('sas_device_handle', 'device/sas_device_handle'), ('support_discard', 'queue/discard_granularity'), ('rotational', 'queue/rotational'), ('nr_requests', 'queue/nr_requests'), ] for key, file_ in facts: metadata[key] = get_file_contents(os.path.join(sysdir, file_)) metadata['scheduler_mode'] = "" scheduler = get_file_contents(sysdir + "/queue/scheduler") if scheduler is not None: m = re.match(r".*?(\[(.*)\])", scheduler) if m: metadata['scheduler_mode'] = m.group(2) metadata['partitions'] = get_partitions_facts(sysdir) size = get_file_contents(os.path.join(sysdir, 'size'), 0) metadata['sectors'] = get_file_contents(os.path.join(sysdir, 'sectors'), 0) fallback_sectorsize = get_file_contents(sysdir + "/queue/hw_sector_size", 512) metadata['sectorsize'] = get_file_contents(sysdir + "/queue/logical_block_size", fallback_sectorsize) metadata['size'] = float(size) * 512 metadata['human_readable_size'] = human_readable_size(metadata['size']) metadata['path'] = diskname metadata['locked'] = is_locked_raw_device(metadata['path']) device_facts[diskname] = metadata return device_facts