diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/pybind/mgr/cephadm | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/pybind/mgr/cephadm')
52 files changed, 19645 insertions, 0 deletions
diff --git a/src/pybind/mgr/cephadm/.gitignore b/src/pybind/mgr/cephadm/.gitignore new file mode 100644 index 000000000..a273f8603 --- /dev/null +++ b/src/pybind/mgr/cephadm/.gitignore @@ -0,0 +1,2 @@ +.vagrant +ssh-config diff --git a/src/pybind/mgr/cephadm/HACKING.rst b/src/pybind/mgr/cephadm/HACKING.rst new file mode 100644 index 000000000..fa6ea9e1b --- /dev/null +++ b/src/pybind/mgr/cephadm/HACKING.rst @@ -0,0 +1,272 @@ +Development +=========== + + +There are multiple ways to set up a development environment for the SSH orchestrator. +In the following I'll use the `vstart` method. + +1) Make sure remoto is installed (0.35 or newer) + +2) Use vstart to spin up a cluster + + +:: + + # ../src/vstart.sh -n --cephadm + +*Note that when you specify `--cephadm` you have to have passwordless ssh access to localhost* + +It will add your ~/.ssh/id_rsa and ~/.ssh/id_rsa.pub to `mgr/ssh/ssh_identity_{key, pub}` +and add your $HOSTNAME to the list of known hosts. + +This will also enable the cephadm mgr module and enable it as the orchestrator backend. + +*Optional:* + +While the above is sufficient for most operations, you may want to add a second host to the mix. +There is `Vagrantfile` for creating a minimal cluster in `src/pybind/mgr/cephadm/`. + +If you wish to extend the one-node-localhost cluster to i.e. test more sophisticated OSD deployments you can follow the next steps: + +From within the `src/pybind/mgr/cephadm` directory. + + +1) Spawn VMs + +:: + + # vagrant up + +This will spawn three machines by default. +mon0, mgr0 and osd0 with 2 additional disks. + +You can change that by passing `MONS` (default: 1), `MGRS` (default: 1), `OSDS` (default: 1) and +`DISKS` (default: 2) environment variables to overwrite the defaults. In order to not always have +to set the environment variables you can now create as JSON see `./vagrant.config.example.json` +for details. + +If will also come with the necessary packages preinstalled as well as your ~/.ssh/id_rsa.pub key +injected. (to users root and vagrant; the cephadm-orchestrator currently connects as root) + + +2) Update the ssh-config + +The cephadm orchestrator needs to understand how to connect to the new node. Most likely the VM +isn't reachable with the default settings used: + +``` +Host * +User root +StrictHostKeyChecking no +``` + +You want to adjust this by retrieving an adapted ssh_config from Vagrant. + +:: + + # vagrant ssh-config > ssh-config + + +Now set the newly created config for Ceph. + +:: + + # ceph cephadm set-ssh-config -i <path_to_ssh_conf> + + +3) Add the new host + +Add the newly created host(s) to the inventory. + +:: + + + # ceph orch host add <host> + + +4) Verify the inventory + +You should see the hostname in the list. + +:: + + # ceph orch host ls + + +5) Verify the devices + +To verify all disks are set and in good shape look if all devices have been spawned +and can be found + +:: + + # ceph orch device ls + + +6) Make a snapshot of all your VMs! + +To not go the long way again the next time snapshot your VMs in order to revert them back +if they are dirty. + +In `this repository <https://github.com/Devp00l/vagrant-helper-scripts>`_ you can find two +scripts that will help you with doing a snapshot and reverting it, without having to manual +snapshot and revert each VM individually. + + +Understanding ``AsyncCompletion`` +================================= + +How can I store temporary variables? +------------------------------------ + +Let's imagine you want to write code similar to + +.. code:: python + + hosts = self.get_hosts() + inventory = self.get_inventory(hosts) + return self._create_osd(hosts, drive_group, inventory) + +That won't work, as ``get_hosts`` and ``get_inventory`` return objects +of type ``AsyncCompletion``. + +Now let's imaging a Python 3 world, where we can use ``async`` and +``await``. Then we actually can write this like so: + +.. code:: python + + hosts = await self.get_hosts() + inventory = await self.get_inventory(hosts) + return self._create_osd(hosts, drive_group, inventory) + +Let's use a simple example to make this clear: + +.. code:: python + + val = await func_1() + return func_2(val) + +As we're not yet in Python 3, we need to do write ``await`` manually by +calling ``orchestrator.Completion.then()``: + +.. code:: python + + func_1().then(lambda val: func_2(val)) + + # or + func_1().then(func_2) + +Now let's desugar the original example: + +.. code:: python + + hosts = await self.get_hosts() + inventory = await self.get_inventory(hosts) + return self._create_osd(hosts, drive_group, inventory) + +Now let's replace one ``async`` at a time: + +.. code:: python + + hosts = await self.get_hosts() + return self.get_inventory(hosts).then(lambda inventory: + self._create_osd(hosts, drive_group, inventory)) + +Then finally: + +.. code:: python + + self.get_hosts().then(lambda hosts: + self.get_inventory(hosts).then(lambda inventory: + self._create_osd(hosts, + drive_group, inventory))) + +This also works without lambdas: + +.. code:: python + + def call_inventory(hosts): + def call_create(inventory) + return self._create_osd(hosts, drive_group, inventory) + + return self.get_inventory(hosts).then(call_create) + + self.get_hosts(call_inventory) + +We should add support for ``await`` as soon as we're on Python 3. + +I want to call my function for every host! +------------------------------------------ + +Imagine you have a function that looks like so: + +.. code:: python + + @async_completion + def deploy_stuff(name, node): + ... + +And you want to call ``deploy_stuff`` like so: + +.. code:: python + + return [deploy_stuff(name, node) for node in nodes] + +This won't work as expected. The number of ``AsyncCompletion`` objects +created should be ``O(1)``. But there is a solution: +``@async_map_completion`` + +.. code:: python + + @async_map_completion + def deploy_stuff(name, node): + ... + + return deploy_stuff([(name, node) for node in nodes]) + +This way, we're only creating one ``AsyncCompletion`` object. Note that +you should not create new ``AsyncCompletion`` within ``deploy_stuff``, as +we're then no longer have ``O(1)`` completions: + +.. code:: python + + @async_completion + def other_async_function(): + ... + + @async_map_completion + def deploy_stuff(name, node): + return other_async_function() # wrong! + +Why do we need this? +-------------------- + +I've tried to look into making Completions composable by being able to +call one completion from another completion. I.e. making them re-usable +using Promises E.g.: + +.. code:: python + + >>> return self.get_hosts().then(self._create_osd) + +where ``get_hosts`` returns a Completion of list of hosts and +``_create_osd`` takes a list of hosts. + +The concept behind this is to store the computation steps explicit and +then explicitly evaluate the chain: + +.. code:: python + + p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x)) + p.finalize(2) + assert p.result = "4" + +or graphically: + +:: + + +---------------+ +-----------------+ + | | then | | + | lambda x: x*x | +--> | lambda x: str(x)| + | | | | + +---------------+ +-----------------+ diff --git a/src/pybind/mgr/cephadm/Vagrantfile b/src/pybind/mgr/cephadm/Vagrantfile new file mode 100644 index 000000000..3a08380c3 --- /dev/null +++ b/src/pybind/mgr/cephadm/Vagrantfile @@ -0,0 +1,66 @@ +# vi: set ft=ruby : +# +# In order to reduce the need of recreating all vagrant boxes everytime they +# get dirty, snaptshot them and revert the snapshot of them instead. +# Two helpful scripts to do this easily can be found here: +# https://github.com/Devp00l/vagrant-helper-scripts + +require 'json' +configFileName = 'vagrant.config.json' +CONFIG = File.file?(configFileName) && JSON.parse(File.read(File.join(File.dirname(__FILE__), configFileName))) + +def getConfig(name, default) + down = name.downcase + up = name.upcase + CONFIG && CONFIG[down] ? CONFIG[down] : (ENV[up] ? ENV[up].to_i : default) +end + +OSDS = getConfig('OSDS', 1) +MGRS = getConfig('MGRS', 1) +MONS = getConfig('MONS', 1) +DISKS = getConfig('DISKS', 2) + +# Activate only for test purpose as it changes the output of each vagrant command link to get the ssh_config. +# puts "Your setup:","OSDs: #{OSDS}","MGRs: #{MGRS}","MONs: #{MONS}","Disks per OSD: #{DISKS}" + +Vagrant.configure("2") do |config| + config.vm.synced_folder ".", "/vagrant", disabled: true + config.vm.network "private_network", type: "dhcp" + config.vm.box = "centos/8" + + (0..MONS - 1).each do |i| + config.vm.define "mon#{i}" do |mon| + mon.vm.hostname = "mon#{i}" + end + end + (0..MGRS - 1).each do |i| + config.vm.define "mgr#{i}" do |mgr| + mgr.vm.hostname = "mgr#{i}" + end + end + (0..OSDS - 1).each do |i| + config.vm.define "osd#{i}" do |osd| + osd.vm.hostname = "osd#{i}" + osd.vm.provider :libvirt do |libvirt| + (0..DISKS - 1).each do |d| + # In ruby value.chr makes ASCII char from value + libvirt.storage :file, :size => '20G', :device => "vd#{(98+d).chr}#{i}" + end + end + end + end + + config.vm.provision "file", source: "~/.ssh/id_rsa.pub", destination: "~/.ssh/id_rsa.pub" + config.vm.provision "shell", inline: <<-SHELL + cat /home/vagrant/.ssh/id_rsa.pub >> /home/vagrant/.ssh/authorized_keys + sudo cp -r /home/vagrant/.ssh /root/.ssh + SHELL + + config.vm.provision "shell", inline: <<-SHELL + sudo yum install -y yum-utils + sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm + sudo rpm --import 'https://download.ceph.com/keys/release.asc' + curl -L https://shaman.ceph.com/api/repos/ceph/master/latest/centos/8/repo/ | sudo tee /etc/yum.repos.d/shaman.repo + sudo yum install -y python36 podman ceph + SHELL +end diff --git a/src/pybind/mgr/cephadm/__init__.py b/src/pybind/mgr/cephadm/__init__.py new file mode 100644 index 000000000..597d883f7 --- /dev/null +++ b/src/pybind/mgr/cephadm/__init__.py @@ -0,0 +1,10 @@ +from .module import CephadmOrchestrator + +__all__ = [ + "CephadmOrchestrator", +] + +import os +if 'UNITTEST' in os.environ: + import tests + __all__.append(tests.__name__) diff --git a/src/pybind/mgr/cephadm/autotune.py b/src/pybind/mgr/cephadm/autotune.py new file mode 100644 index 000000000..51c931cba --- /dev/null +++ b/src/pybind/mgr/cephadm/autotune.py @@ -0,0 +1,54 @@ +import logging +from typing import List, Optional, Callable, Any, Tuple + +from orchestrator._interface import DaemonDescription + +logger = logging.getLogger(__name__) + + +class MemoryAutotuner(object): + + min_size_by_type = { + 'mds': 4096 * 1048576, + 'mgr': 4096 * 1048576, + 'mon': 1024 * 1048576, + 'crash': 128 * 1048576, + 'keepalived': 128 * 1048576, + 'haproxy': 128 * 1048576, + } + default_size = 1024 * 1048576 + + def __init__( + self, + daemons: List[DaemonDescription], + config_get: Callable[[str, str], Any], + total_mem: int, + ): + self.daemons = daemons + self.config_get = config_get + self.total_mem = total_mem + + def tune(self) -> Tuple[Optional[int], List[str]]: + tuned_osds: List[str] = [] + total = self.total_mem + for d in self.daemons: + if d.daemon_type == 'mds': + total -= self.config_get(d.name(), 'mds_cache_memory_limit') + continue + if d.daemon_type != 'osd': + assert d.daemon_type + total -= max( + self.min_size_by_type.get(d.daemon_type, self.default_size), + d.memory_usage or 0 + ) + continue + if not self.config_get(d.name(), 'osd_memory_target_autotune'): + total -= self.config_get(d.name(), 'osd_memory_target') + continue + tuned_osds.append(d.name()) + if total < 0: + return None, [] + if not tuned_osds: + return None, [] + per = total // len(tuned_osds) + return int(per), tuned_osds diff --git a/src/pybind/mgr/cephadm/ceph.repo b/src/pybind/mgr/cephadm/ceph.repo new file mode 100644 index 000000000..6f710e7ce --- /dev/null +++ b/src/pybind/mgr/cephadm/ceph.repo @@ -0,0 +1,23 @@ +[ceph] +name=Ceph packages for $basearch +baseurl=https://download.ceph.com/rpm-mimic/el7/$basearch +enabled=1 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc + +[ceph-noarch] +name=Ceph noarch packages +baseurl=https://download.ceph.com/rpm-mimic/el7/noarch +enabled=1 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc + +[ceph-source] +name=Ceph source packages +baseurl=https://download.ceph.com/rpm-mimic/el7/SRPMS +enabled=0 +priority=2 +gpgcheck=1 +gpgkey=https://download.ceph.com/keys/release.asc diff --git a/src/pybind/mgr/cephadm/configchecks.py b/src/pybind/mgr/cephadm/configchecks.py new file mode 100644 index 000000000..dc7a09827 --- /dev/null +++ b/src/pybind/mgr/cephadm/configchecks.py @@ -0,0 +1,705 @@ +import json +import ipaddress +import logging + +from mgr_module import ServiceInfoT + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast, Tuple, Callable + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + + +class HostFacts: + + def __init__(self) -> None: + self.arch: Optional[str] = None + self.bios_date: Optional[str] = None + self.bios_version: Optional[str] = None + self.cpu_cores: Optional[int] = None + self.cpu_count: Optional[int] = None + self.cpu_load: Optional[Dict[str, float]] = None + self.cpu_model: Optional[str] = None + self.cpu_threads: Optional[int] = None + self.flash_capacity: Optional[str] = None + self.flash_capacity_bytes: Optional[int] = None + self.flash_count: Optional[int] = None + self.flash_list: Optional[List[Dict[str, Any]]] = None + self.hdd_capacity: Optional[str] = None + self.hdd_capacity_bytes: Optional[int] = None + self.hdd_count: Optional[int] = None + self.hdd_list: Optional[List[Dict[str, Any]]] = None + self.hostname: Optional[str] = None + self.interfaces: Optional[Dict[str, Dict[str, Any]]] = None + self.kernel: Optional[str] = None + self.kernel_parameters: Optional[Dict[str, Any]] = None + self.kernel_security: Optional[Dict[str, str]] = None + self.memory_available_kb: Optional[int] = None + self.memory_free_kb: Optional[int] = None + self.memory_total_kb: Optional[int] = None + self.model: Optional[str] = None + self.nic_count: Optional[int] = None + self.operating_system: Optional[str] = None + self.subscribed: Optional[str] = None + self.system_uptime: Optional[float] = None + self.timestamp: Optional[float] = None + self.vendor: Optional[str] = None + self._valid = False + + def load_facts(self, json_data: Dict[str, Any]) -> None: + + if isinstance(json_data, dict): + keys = json_data.keys() + if all([k in keys for k in self.__dict__ if not k.startswith('_')]): + self._valid = True + for k in json_data.keys(): + if hasattr(self, k): + setattr(self, k, json_data[k]) + else: + self._valid = False + else: + self._valid = False + + def subnet_to_nic(self, subnet: str) -> Optional[str]: + ip_version = ipaddress.ip_network(subnet).version + logger.debug(f"subnet {subnet} is IP version {ip_version}") + interfaces = cast(Dict[str, Dict[str, Any]], self.interfaces) + nic = None + for iface in interfaces.keys(): + addr = '' + if ip_version == 4: + addr = interfaces[iface].get('ipv4_address', '') + else: + addr = interfaces[iface].get('ipv6_address', '') + if addr: + a = addr.split('/')[0] + if ipaddress.ip_address(a) in ipaddress.ip_network(subnet): + nic = iface + break + return nic + + +class SubnetLookup: + def __init__(self, subnet: str, hostname: str, mtu: str, speed: str): + self.subnet = subnet + self.mtu_map = { + mtu: [hostname] + } + self.speed_map = { + speed: [hostname] + } + + @ property + def host_list(self) -> List[str]: + hosts = [] + for mtu in self.mtu_map: + hosts.extend(self.mtu_map.get(mtu, [])) + return hosts + + def update(self, hostname: str, mtu: str, speed: str) -> None: + if mtu in self.mtu_map and hostname not in self.mtu_map[mtu]: + self.mtu_map[mtu].append(hostname) + else: + self.mtu_map[mtu] = [hostname] + + if speed in self.speed_map and hostname not in self.speed_map[speed]: + self.speed_map[speed].append(hostname) + else: + self.speed_map[speed] = [hostname] + + def __repr__(self) -> str: + return json.dumps({ + "subnet": self.subnet, + "mtu_map": self.mtu_map, + "speed_map": self.speed_map + }) + + +class CephadmCheckDefinition: + def __init__(self, mgr: "CephadmOrchestrator", healthcheck_name: str, description: str, name: str, func: Callable) -> None: + self.mgr = mgr + self.log = logger + self.healthcheck_name = healthcheck_name + self.description = description + self.name = name + self.func = func + + @property + def status(self) -> str: + check_states: Dict[str, str] = {} + # Issuing a get each time, since the value could be set at the CLI + raw_states = self.mgr.get_store('config_checks') + if not raw_states: + self.log.error( + "config_checks setting is not defined - unable to determine healthcheck state") + return "Unknown" + + try: + check_states = json.loads(raw_states) + except json.JSONDecodeError: + self.log.error("Unable to serialize the config_checks settings to JSON") + return "Unavailable" + + return check_states.get(self.name, 'Missing') + + def to_json(self) -> Dict[str, Any]: + return { + "healthcheck_name": self.healthcheck_name, + "description": self.description, + "name": self.name, + "status": self.status, + "valid": True if self.func else False + } + + +class CephadmConfigChecks: + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + self.health_checks: List[CephadmCheckDefinition] = [ + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_KERNEL_LSM", + "checks SELINUX/Apparmor profiles are consistent across cluster hosts", + "kernel_security", + self._check_kernel_lsm), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_SUBSCRIPTION", + "checks subscription states are consistent for all cluster hosts", + "os_subscription", + self._check_subscription), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_PUBLIC_MEMBERSHIP", + "check that all hosts have a NIC on the Ceph public_netork", + "public_network", + self._check_public_network), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_MTU", + "check that OSD hosts share a common MTU setting", + "osd_mtu_size", + self._check_osd_mtu), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_LINKSPEED", + "check that OSD hosts share a common linkspeed", + "osd_linkspeed", + self._check_osd_linkspeed), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_NETWORK_MISSING", + "checks that the cluster/public networks defined exist on the Ceph hosts", + "network_missing", + self._check_network_missing), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_CEPH_RELEASE", + "check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active)", + "ceph_release", + self._check_release_parity), + CephadmCheckDefinition(mgr, "CEPHADM_CHECK_KERNEL_VERSION", + "checks that the MAJ.MIN of the kernel on Ceph hosts is consistent", + "kernel_version", + self._check_kernel_version), + ] + self.log = logger + self.host_facts: Dict[str, HostFacts] = {} + self.subnet_lookup: Dict[str, SubnetLookup] = {} # subnet CIDR -> SubnetLookup Object + self.lsm_to_host: Dict[str, List[str]] = {} + self.subscribed: Dict[str, List[str]] = { + "yes": [], + "no": [], + "unknown": [], + } + self.host_to_role: Dict[str, List[str]] = {} + self.kernel_to_hosts: Dict[str, List[str]] = {} + + self.public_network_list: List[str] = [] + self.cluster_network_list: List[str] = [] + self.health_check_raised = False + self.active_checks: List[str] = [] # checks enabled and executed + self.skipped_checks: List[str] = [] # checks enabled, but skipped due to a pre-req failure + + raw_checks = self.mgr.get_store('config_checks') + if not raw_checks: + # doesn't exist, so seed the checks + self.seed_config_checks() + else: + # setting is there, so ensure there is an entry for each of the checks that + # this module supports (account for upgrades/changes) + try: + config_checks = json.loads(raw_checks) + except json.JSONDecodeError: + self.log.error("Unable to serialize config_checks config. Reset to defaults") + self.seed_config_checks() + else: + # Ensure the config_checks setting is consistent with this module + from_config = set(config_checks.keys()) + from_module = set([c.name for c in self.health_checks]) + old_checks = from_config.difference(from_module) + new_checks = from_module.difference(from_config) + + if old_checks: + self.log.debug(f"old checks being removed from config_checks: {old_checks}") + for i in old_checks: + del config_checks[i] + if new_checks: + self.log.debug(f"new checks being added to config_checks: {new_checks}") + for i in new_checks: + config_checks[i] = 'enabled' + + if old_checks or new_checks: + self.log.info( + f"config_checks updated: {len(old_checks)} removed, {len(new_checks)} added") + self.mgr.set_store('config_checks', json.dumps(config_checks)) + else: + self.log.debug("config_checks match module definition") + + def lookup_check(self, key_value: str, key_name: str = 'name') -> Optional[CephadmCheckDefinition]: + + for c in self.health_checks: + if getattr(c, key_name) == key_value: + return c + return None + + @property + def defined_checks(self) -> int: + return len(self.health_checks) + + @property + def active_checks_count(self) -> int: + return len(self.active_checks) + + def seed_config_checks(self) -> None: + defaults = {check.name: 'enabled' for check in self.health_checks} + self.mgr.set_store('config_checks', json.dumps(defaults)) + + @property + def skipped_checks_count(self) -> int: + return len(self.skipped_checks) + + def to_json(self) -> List[Dict[str, str]]: + return [check.to_json() for check in self.health_checks] + + def load_network_config(self) -> None: + ret, out, _err = self.mgr.check_mon_command({ + 'prefix': 'config dump', + 'format': 'json' + }) + assert ret == 0 + js = json.loads(out) + for item in js: + if item['name'] == "cluster_network": + self.cluster_network_list = item['value'].strip().split(',') + if item['name'] == "public_network": + self.public_network_list = item['value'].strip().split(',') + + self.log.debug(f"public networks {self.public_network_list}") + self.log.debug(f"cluster networks {self.cluster_network_list}") + + def _update_subnet(self, subnet: str, hostname: str, nic: Dict[str, Any]) -> None: + mtu = nic.get('mtu', None) + speed = nic.get('speed', None) + if not mtu or not speed: + return + + this_subnet = self.subnet_lookup.get(subnet, None) + if this_subnet: + this_subnet.update(hostname, mtu, speed) + else: + self.subnet_lookup[subnet] = SubnetLookup(subnet, hostname, mtu, speed) + + def _update_subnet_lookups(self, hostname: str, devname: str, nic: Dict[str, Any]) -> None: + if nic['ipv4_address']: + try: + iface4 = ipaddress.IPv4Interface(nic['ipv4_address']) + subnet = str(iface4.network) + except ipaddress.AddressValueError as e: + self.log.exception(f"Invalid network on {hostname}, interface {devname} : {str(e)}") + else: + self._update_subnet(subnet, hostname, nic) + + if nic['ipv6_address']: + try: + iface6 = ipaddress.IPv6Interface(nic['ipv6_address']) + subnet = str(iface6.network) + except ipaddress.AddressValueError as e: + self.log.exception(f"Invalid network on {hostname}, interface {devname} : {str(e)}") + else: + self._update_subnet(subnet, hostname, nic) + + def hosts_with_role(self, role: str) -> List[str]: + host_list = [] + for hostname, roles in self.host_to_role.items(): + if role in roles: + host_list.append(hostname) + return host_list + + def reset(self) -> None: + self.subnet_lookup.clear() + self.lsm_to_host.clear() + self.subscribed['yes'] = [] + self.subscribed['no'] = [] + self.subscribed['unknown'] = [] + self.host_to_role.clear() + self.kernel_to_hosts.clear() + + def _get_majority(self, data: Dict[str, List[str]]) -> Tuple[str, int]: + assert isinstance(data, dict) + + majority_key = '' + majority_count = 0 + for key in data: + if len(data[key]) > majority_count: + majority_count = len(data[key]) + majority_key = key + return majority_key, majority_count + + def get_ceph_metadata(self) -> Dict[str, Optional[Dict[str, str]]]: + """Build a map of service -> service metadata""" + service_map: Dict[str, Optional[Dict[str, str]]] = {} + + for server in self.mgr.list_servers(): + for service in cast(List[ServiceInfoT], server.get('services', [])): + if service: + service_map.update( + { + f"{service['type']}.{service['id']}": + self.mgr.get_metadata(service['type'], service['id']) + } + ) + return service_map + + def _check_kernel_lsm(self) -> None: + if len(self.lsm_to_host.keys()) > 1: + + majority_hosts_ptr, majority_hosts_count = self._get_majority(self.lsm_to_host) + lsm_copy = self.lsm_to_host.copy() + del lsm_copy[majority_hosts_ptr] + details = [] + for lsm_key in lsm_copy.keys(): + for host in lsm_copy[lsm_key]: + details.append( + f"{host} has inconsistent KSM settings compared to the " + f"majority of hosts({majority_hosts_count}) in the cluster") + host_sfx = 's' if len(details) > 1 else '' + self.mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM'] = { + 'severity': 'warning', + 'summary': f"Kernel Security Module (SELinux/AppArmor) is inconsistent for " + f"{len(details)} host{host_sfx}", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_KERNEL_LSM', None) + + def _check_subscription(self) -> None: + if len(self.subscribed['yes']) > 0 and len(self.subscribed['no']) > 0: + # inconsistent subscription states - CEPHADM_CHECK_SUBSCRIPTION + details = [] + for host in self.subscribed['no']: + details.append(f"{host} does not have an active subscription") + self.mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION'] = { + 'severity': 'warning', + 'summary': f"Support subscriptions inactive on {len(details)} host(s)" + f"({len(self.subscribed['yes'])} subscriptions active)", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_SUBSCRIPTION', None) + + def _check_public_network(self) -> None: + hosts_remaining: List[str] = list(self.mgr.cache.facts.keys()) + hosts_removed: List[str] = [] + self.log.debug(f"checking public network membership for: {hosts_remaining}") + + for p_net in self.public_network_list: + self.log.debug(f"checking network {p_net}") + subnet_data = self.subnet_lookup.get(p_net, None) + self.log.debug(f"subnet data - {subnet_data}") + + if subnet_data: + hosts_in_subnet = subnet_data.host_list + for host in hosts_in_subnet: + if host in hosts_remaining: + hosts_remaining.remove(host) + hosts_removed.append(host) + else: + if host not in hosts_removed: + self.log.debug(f"host={host}, subnet={p_net}") + self.log.exception( + "Host listed for a subnet but not present in the host facts?") + + # Ideally all hosts will have been removed since they have an IP on at least + # one of the public networks + if hosts_remaining: + if len(hosts_remaining) != len(self.mgr.cache.facts): + # public network is visible on some hosts + details = [ + f"{host} does not have an interface on any public network" for host in hosts_remaining] + + self.mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP'] = { + 'severity': 'warning', + 'summary': f"Public network(s) is not directly accessible from {len(hosts_remaining)} " + "cluster hosts", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_PUBLIC_MEMBERSHIP', None) + + def _check_osd_mtu(self) -> None: + osd_hosts = set(self.hosts_with_role('osd')) + osd_network_list = self.cluster_network_list or self.public_network_list + mtu_errors: List[str] = [] + + for osd_net in osd_network_list: + subnet_data = self.subnet_lookup.get(osd_net, None) + + if subnet_data: + + self.log.debug(f"processing mtu map : {json.dumps(subnet_data.mtu_map)}") + mtu_count = {} + max_hosts = 0 + mtu_ptr = '' + diffs = {} + for mtu, host_list in subnet_data.mtu_map.items(): + mtu_hosts = set(host_list) + mtu_count[mtu] = len(mtu_hosts) + errors = osd_hosts.difference(mtu_hosts) + if errors: + diffs[mtu] = errors + if len(errors) > max_hosts: + mtu_ptr = mtu + + if diffs: + self.log.debug("MTU problems detected") + self.log.debug(f"most hosts using {mtu_ptr}") + mtu_copy = subnet_data.mtu_map.copy() + del mtu_copy[mtu_ptr] + for bad_mtu in mtu_copy: + for h in mtu_copy[bad_mtu]: + host = HostFacts() + host.load_facts(self.mgr.cache.facts[h]) + mtu_errors.append( + f"host {h}({host.subnet_to_nic(osd_net)}) is using MTU " + f"{bad_mtu} on {osd_net}, NICs on other hosts use {mtu_ptr}") + + if mtu_errors: + self.mgr.health_checks['CEPHADM_CHECK_MTU'] = { + 'severity': 'warning', + 'summary': f"MTU setting inconsistent on osd network NICs on {len(mtu_errors)} host(s)", + 'count': len(mtu_errors), + 'detail': mtu_errors, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_MTU', None) + + def _check_osd_linkspeed(self) -> None: + osd_hosts = set(self.hosts_with_role('osd')) + osd_network_list = self.cluster_network_list or self.public_network_list + + linkspeed_errors = [] + + for osd_net in osd_network_list: + subnet_data = self.subnet_lookup.get(osd_net, None) + + if subnet_data: + + self.log.debug(f"processing subnet : {subnet_data}") + + speed_count = {} + max_hosts = 0 + speed_ptr = '' + diffs = {} + for speed, host_list in subnet_data.speed_map.items(): + speed_hosts = set(host_list) + speed_count[speed] = len(speed_hosts) + errors = osd_hosts.difference(speed_hosts) + if errors: + diffs[speed] = errors + if len(errors) > max_hosts: + speed_ptr = speed + + if diffs: + self.log.debug("linkspeed issue(s) detected") + self.log.debug(f"most hosts using {speed_ptr}") + speed_copy = subnet_data.speed_map.copy() + del speed_copy[speed_ptr] + for bad_speed in speed_copy: + if bad_speed > speed_ptr: + # skip speed is better than most...it can stay! + continue + for h in speed_copy[bad_speed]: + host = HostFacts() + host.load_facts(self.mgr.cache.facts[h]) + linkspeed_errors.append( + f"host {h}({host.subnet_to_nic(osd_net)}) has linkspeed of " + f"{bad_speed} on {osd_net}, NICs on other hosts use {speed_ptr}") + + if linkspeed_errors: + self.mgr.health_checks['CEPHADM_CHECK_LINKSPEED'] = { + 'severity': 'warning', + 'summary': "Link speed is inconsistent on osd network NICs for " + f"{len(linkspeed_errors)} host(s)", + 'count': len(linkspeed_errors), + 'detail': linkspeed_errors, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_LINKSPEED', None) + + def _check_network_missing(self) -> None: + all_networks = self.public_network_list.copy() + all_networks.extend(self.cluster_network_list) + + missing_networks = [] + for subnet in all_networks: + subnet_data = self.subnet_lookup.get(subnet, None) + + if not subnet_data: + missing_networks.append(f"{subnet} not found on any host in the cluster") + self.log.warning( + f"Network {subnet} has been defined, but is not present on any host") + + if missing_networks: + net_sfx = 's' if len(missing_networks) > 1 else '' + self.mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING'] = { + 'severity': 'warning', + 'summary': f"Public/cluster network{net_sfx} defined, but can not be found on " + "any host", + 'count': len(missing_networks), + 'detail': missing_networks, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_NETWORK_MISSING', None) + + def _check_release_parity(self) -> None: + upgrade_status = self.mgr.upgrade.upgrade_status() + if upgrade_status.in_progress: + # skip version consistency checks during an upgrade cycle + self.skipped_checks.append('ceph_release') + return + + services = self.get_ceph_metadata() + self.log.debug(json.dumps(services)) + version_to_svcs: Dict[str, List[str]] = {} + + for svc in services: + if services[svc]: + metadata = cast(Dict[str, str], services[svc]) + v = metadata.get('ceph_release', '') + if v in version_to_svcs: + version_to_svcs[v].append(svc) + else: + version_to_svcs[v] = [svc] + + if len(version_to_svcs) > 1: + majority_ptr, _majority_count = self._get_majority(version_to_svcs) + ver_copy = version_to_svcs.copy() + del ver_copy[majority_ptr] + details = [] + for v in ver_copy: + for svc in ver_copy[v]: + details.append( + f"{svc} is running {v} (majority of cluster is using {majority_ptr})") + + self.mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE'] = { + 'severity': 'warning', + 'summary': 'Ceph cluster running mixed ceph releases', + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + self.log.warning( + f"running with {len(version_to_svcs)} different ceph releases within this cluster") + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_CEPH_RELEASE', None) + + def _check_kernel_version(self) -> None: + if len(self.kernel_to_hosts.keys()) > 1: + majority_hosts_ptr, majority_hosts_count = self._get_majority(self.kernel_to_hosts) + kver_copy = self.kernel_to_hosts.copy() + del kver_copy[majority_hosts_ptr] + details = [] + for k in kver_copy: + for h in kver_copy[k]: + details.append( + f"host {h} running kernel {k}, majority of hosts({majority_hosts_count}) " + f"running {majority_hosts_ptr}") + + self.log.warning("mixed kernel versions detected") + self.mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION'] = { + 'severity': 'warning', + 'summary': f"{len(details)} host(s) running different kernel versions", + 'count': len(details), + 'detail': details, + } + self.health_check_raised = True + else: + self.mgr.health_checks.pop('CEPHADM_CHECK_KERNEL_VERSION', None) + + def _process_hosts(self) -> None: + self.log.debug(f"processing data from {len(self.mgr.cache.facts)} hosts") + for hostname in self.mgr.cache.facts: + host = HostFacts() + host.load_facts(self.mgr.cache.facts[hostname]) + if not host._valid: + self.log.warning(f"skipping {hostname} - incompatible host facts") + continue + + kernel_lsm = cast(Dict[str, str], host.kernel_security) + lsm_desc = kernel_lsm.get('description', '') + if lsm_desc: + if lsm_desc in self.lsm_to_host: + self.lsm_to_host[lsm_desc].append(hostname) + else: + self.lsm_to_host[lsm_desc] = [hostname] + + subscription_state = host.subscribed.lower() if host.subscribed else None + if subscription_state: + self.subscribed[subscription_state].append(hostname) + + interfaces = cast(Dict[str, Dict[str, Any]], host.interfaces) + for name in interfaces.keys(): + if name in ['lo']: + continue + self._update_subnet_lookups(hostname, name, interfaces[name]) + + if host.kernel: + kernel_maj_min = '.'.join(host.kernel.split('.')[0:2]) + if kernel_maj_min in self.kernel_to_hosts: + self.kernel_to_hosts[kernel_maj_min].append(hostname) + else: + self.kernel_to_hosts[kernel_maj_min] = [hostname] + else: + self.log.warning(f"Host gather facts for {hostname} is missing kernel information") + + # NOTE: if daemondescription had systemd enabled state, we could check for systemd 'tampering' + self.host_to_role[hostname] = list(self.mgr.cache.get_daemon_types(hostname)) + + def run_checks(self) -> None: + checks_enabled = self.mgr.get_module_option('config_checks_enabled') + if checks_enabled is not True: + return + + self.reset() + + check_config: Dict[str, str] = {} + checks_raw: Optional[str] = self.mgr.get_store('config_checks') + if checks_raw: + try: + check_config.update(json.loads(checks_raw)) + except json.JSONDecodeError: + self.log.exception( + "mgr/cephadm/config_checks is not JSON serializable - all checks will run") + + # build lookup "maps" by walking the host facts, once + self._process_hosts() + + self.health_check_raised = False + self.active_checks = [] + self.skipped_checks = [] + + # process all healthchecks that are not explcitly disabled + for health_check in self.health_checks: + if check_config.get(health_check.name, '') != 'disabled': + self.active_checks.append(health_check.name) + health_check.func() + + self.mgr.set_health_checks(self.mgr.health_checks) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py new file mode 100644 index 000000000..92e10ea39 --- /dev/null +++ b/src/pybind/mgr/cephadm/inventory.py @@ -0,0 +1,1019 @@ +import datetime +from copy import copy +import ipaddress +import json +import logging +import socket +from typing import TYPE_CHECKING, Dict, List, Iterator, Optional, Any, Tuple, Set, Mapping, cast, \ + NamedTuple, Type + +import orchestrator +from ceph.deployment import inventory +from ceph.deployment.service_spec import ServiceSpec, PlacementSpec +from ceph.utils import str_to_datetime, datetime_to_str, datetime_now +from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types + +from .utils import resolve_ip +from .migrations import queue_migrate_nfs_spec + +if TYPE_CHECKING: + from .module import CephadmOrchestrator + + +logger = logging.getLogger(__name__) + +HOST_CACHE_PREFIX = "host." +SPEC_STORE_PREFIX = "spec." + + +class Inventory: + """ + The inventory stores a HostSpec for all hosts persistently. + """ + + def __init__(self, mgr: 'CephadmOrchestrator'): + self.mgr = mgr + adjusted_addrs = False + + def is_valid_ip(ip: str) -> bool: + try: + ipaddress.ip_address(ip) + return True + except ValueError: + return False + + # load inventory + i = self.mgr.get_store('inventory') + if i: + self._inventory: Dict[str, dict] = json.loads(i) + # handle old clusters missing 'hostname' key from hostspec + for k, v in self._inventory.items(): + if 'hostname' not in v: + v['hostname'] = k + + # convert legacy non-IP addr? + if is_valid_ip(str(v.get('addr'))): + continue + if len(self._inventory) > 1: + if k == socket.gethostname(): + # Never try to resolve our own host! This is + # fraught and can lead to either a loopback + # address (due to podman's futzing with + # /etc/hosts) or a private IP based on the CNI + # configuration. Instead, wait until the mgr + # fails over to another host and let them resolve + # this host. + continue + ip = resolve_ip(cast(str, v.get('addr'))) + else: + # we only have 1 node in the cluster, so we can't + # rely on another host doing the lookup. use the + # IP the mgr binds to. + ip = self.mgr.get_mgr_ip() + if is_valid_ip(ip) and not ip.startswith('127.0.'): + self.mgr.log.info( + f"inventory: adjusted host {v['hostname']} addr '{v['addr']}' -> '{ip}'" + ) + v['addr'] = ip + adjusted_addrs = True + if adjusted_addrs: + self.save() + else: + self._inventory = dict() + logger.debug('Loaded inventory %s' % self._inventory) + + def keys(self) -> List[str]: + return list(self._inventory.keys()) + + def __contains__(self, host: str) -> bool: + return host in self._inventory + + def assert_host(self, host: str) -> None: + if host not in self._inventory: + raise OrchestratorError('host %s does not exist' % host) + + def add_host(self, spec: HostSpec) -> None: + if spec.hostname in self._inventory: + # addr + if self.get_addr(spec.hostname) != spec.addr: + self.set_addr(spec.hostname, spec.addr) + # labels + for label in spec.labels: + self.add_label(spec.hostname, label) + else: + self._inventory[spec.hostname] = spec.to_json() + self.save() + + def rm_host(self, host: str) -> None: + self.assert_host(host) + del self._inventory[host] + self.save() + + def set_addr(self, host: str, addr: str) -> None: + self.assert_host(host) + self._inventory[host]['addr'] = addr + self.save() + + def add_label(self, host: str, label: str) -> None: + self.assert_host(host) + + if 'labels' not in self._inventory[host]: + self._inventory[host]['labels'] = list() + if label not in self._inventory[host]['labels']: + self._inventory[host]['labels'].append(label) + self.save() + + def rm_label(self, host: str, label: str) -> None: + self.assert_host(host) + + if 'labels' not in self._inventory[host]: + self._inventory[host]['labels'] = list() + if label in self._inventory[host]['labels']: + self._inventory[host]['labels'].remove(label) + self.save() + + def has_label(self, host: str, label: str) -> bool: + return ( + host in self._inventory + and label in self._inventory[host].get('labels', []) + ) + + def get_addr(self, host: str) -> str: + self.assert_host(host) + return self._inventory[host].get('addr', host) + + def spec_from_dict(self, info: dict) -> HostSpec: + hostname = info['hostname'] + return HostSpec( + hostname, + addr=info.get('addr', hostname), + labels=info.get('labels', []), + status='Offline' if hostname in self.mgr.offline_hosts else info.get('status', ''), + ) + + def all_specs(self) -> List[HostSpec]: + return list(map(self.spec_from_dict, self._inventory.values())) + + def get_host_with_state(self, state: str = "") -> List[str]: + """return a list of host names in a specific state""" + return [h for h in self._inventory if self._inventory[h].get("status", "").lower() == state] + + def save(self) -> None: + self.mgr.set_store('inventory', json.dumps(self._inventory)) + + +class SpecDescription(NamedTuple): + spec: ServiceSpec + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + created: datetime.datetime + deleted: Optional[datetime.datetime] + + +class SpecStore(): + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr = mgr + self._specs = {} # type: Dict[str, ServiceSpec] + # service_name -> rank -> gen -> daemon_id + self._rank_maps = {} # type: Dict[str, Dict[int, Dict[int, Optional[str]]]] + self.spec_created = {} # type: Dict[str, datetime.datetime] + self.spec_deleted = {} # type: Dict[str, datetime.datetime] + self.spec_preview = {} # type: Dict[str, ServiceSpec] + + @property + def all_specs(self) -> Mapping[str, ServiceSpec]: + """ + returns active and deleted specs. Returns read-only dict. + """ + return self._specs + + def __contains__(self, name: str) -> bool: + return name in self._specs + + def __getitem__(self, name: str) -> SpecDescription: + if name not in self._specs: + raise OrchestratorError(f'Service {name} not found.') + return SpecDescription(self._specs[name], + self._rank_maps.get(name), + self.spec_created[name], + self.spec_deleted.get(name, None)) + + @property + def active_specs(self) -> Mapping[str, ServiceSpec]: + return {k: v for k, v in self._specs.items() if k not in self.spec_deleted} + + def load(self): + # type: () -> None + for k, v in self.mgr.get_store_prefix(SPEC_STORE_PREFIX).items(): + service_name = k[len(SPEC_STORE_PREFIX):] + try: + j = cast(Dict[str, dict], json.loads(v)) + if ( + (self.mgr.migration_current or 0) < 3 + and j['spec'].get('service_type') == 'nfs' + ): + self.mgr.log.debug(f'found legacy nfs spec {j}') + queue_migrate_nfs_spec(self.mgr, j) + spec = ServiceSpec.from_json(j['spec']) + created = str_to_datetime(cast(str, j['created'])) + self._specs[service_name] = spec + self.spec_created[service_name] = created + + if 'deleted' in j: + deleted = str_to_datetime(cast(str, j['deleted'])) + self.spec_deleted[service_name] = deleted + + if 'rank_map' in j and isinstance(j['rank_map'], dict): + self._rank_maps[service_name] = {} + for rank_str, m in j['rank_map'].items(): + try: + rank = int(rank_str) + except ValueError: + logger.exception(f"failed to parse rank in {j['rank_map']}") + continue + if isinstance(m, dict): + self._rank_maps[service_name][rank] = {} + for gen_str, name in m.items(): + try: + gen = int(gen_str) + except ValueError: + logger.exception(f"failed to parse gen in {j['rank_map']}") + continue + if isinstance(name, str) or m is None: + self._rank_maps[service_name][rank][gen] = name + + self.mgr.log.debug('SpecStore: loaded spec for %s' % ( + service_name)) + except Exception as e: + self.mgr.log.warning('unable to load spec for %s: %s' % ( + service_name, e)) + pass + + def save( + self, + spec: ServiceSpec, + update_create: bool = True, + ) -> None: + name = spec.service_name() + if spec.preview_only: + self.spec_preview[name] = spec + return None + self._specs[name] = spec + + if update_create: + self.spec_created[name] = datetime_now() + self._save(name) + + def save_rank_map(self, + name: str, + rank_map: Dict[int, Dict[int, Optional[str]]]) -> None: + self._rank_maps[name] = rank_map + self._save(name) + + def _save(self, name: str) -> None: + data: Dict[str, Any] = { + 'spec': self._specs[name].to_json(), + 'created': datetime_to_str(self.spec_created[name]), + } + if name in self._rank_maps: + data['rank_map'] = self._rank_maps[name] + if name in self.spec_deleted: + data['deleted'] = datetime_to_str(self.spec_deleted[name]) + + self.mgr.set_store( + SPEC_STORE_PREFIX + name, + json.dumps(data, sort_keys=True), + ) + self.mgr.events.for_service(self._specs[name], + OrchestratorEvent.INFO, + 'service was created') + + def rm(self, service_name: str) -> bool: + if service_name not in self._specs: + return False + + if self._specs[service_name].preview_only: + self.finally_rm(service_name) + return True + + self.spec_deleted[service_name] = datetime_now() + self.save(self._specs[service_name], update_create=False) + return True + + def finally_rm(self, service_name): + # type: (str) -> bool + found = service_name in self._specs + if found: + del self._specs[service_name] + if service_name in self._rank_maps: + del self._rank_maps[service_name] + del self.spec_created[service_name] + if service_name in self.spec_deleted: + del self.spec_deleted[service_name] + self.mgr.set_store(SPEC_STORE_PREFIX + service_name, None) + return found + + def get_created(self, spec: ServiceSpec) -> Optional[datetime.datetime]: + return self.spec_created.get(spec.service_name()) + + +class ClientKeyringSpec(object): + """ + A client keyring file that we should maintain + """ + + def __init__( + self, + entity: str, + placement: PlacementSpec, + mode: Optional[int] = None, + uid: Optional[int] = None, + gid: Optional[int] = None, + ) -> None: + self.entity = entity + self.placement = placement + self.mode = mode or 0o600 + self.uid = uid or 0 + self.gid = gid or 0 + + def validate(self) -> None: + pass + + def to_json(self) -> Dict[str, Any]: + return { + 'entity': self.entity, + 'placement': self.placement.to_json(), + 'mode': self.mode, + 'uid': self.uid, + 'gid': self.gid, + } + + @property + def path(self) -> str: + return f'/etc/ceph/ceph.{self.entity}.keyring' + + @classmethod + def from_json(cls: Type, data: dict) -> 'ClientKeyringSpec': + c = data.copy() + if 'placement' in c: + c['placement'] = PlacementSpec.from_json(c['placement']) + _cls = cls(**c) + _cls.validate() + return _cls + + +class ClientKeyringStore(): + """ + Track client keyring files that we are supposed to maintain + """ + + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr: CephadmOrchestrator = mgr + self.mgr = mgr + self.keys: Dict[str, ClientKeyringSpec] = {} + + def load(self) -> None: + c = self.mgr.get_store('client_keyrings') or b'{}' + j = json.loads(c) + for e, d in j.items(): + self.keys[e] = ClientKeyringSpec.from_json(d) + + def save(self) -> None: + data = { + k: v.to_json() for k, v in self.keys.items() + } + self.mgr.set_store('client_keyrings', json.dumps(data)) + + def update(self, ks: ClientKeyringSpec) -> None: + self.keys[ks.entity] = ks + self.save() + + def rm(self, entity: str) -> None: + if entity in self.keys: + del self.keys[entity] + self.save() + + +class HostCache(): + """ + HostCache stores different things: + + 1. `daemons`: Deployed daemons O(daemons) + + They're part of the configuration nowadays and need to be + persistent. The name "daemon cache" is unfortunately a bit misleading. + Like for example we really need to know where daemons are deployed on + hosts that are offline. + + 2. `devices`: ceph-volume inventory cache O(hosts) + + As soon as this is populated, it becomes more or less read-only. + + 3. `networks`: network interfaces for each host. O(hosts) + + This is needed in order to deploy MONs. As this is mostly read-only. + + 4. `last_client_files` O(hosts) + + Stores the last digest and owner/mode for files we've pushed to /etc/ceph + (ceph.conf or client keyrings). + + 5. `scheduled_daemon_actions`: O(daemons) + + Used to run daemon actions after deploying a daemon. We need to + store it persistently, in order to stay consistent across + MGR failovers. + """ + + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr: CephadmOrchestrator = mgr + self.daemons = {} # type: Dict[str, Dict[str, orchestrator.DaemonDescription]] + self.last_daemon_update = {} # type: Dict[str, datetime.datetime] + self.devices = {} # type: Dict[str, List[inventory.Device]] + self.facts = {} # type: Dict[str, Dict[str, Any]] + self.last_facts_update = {} # type: Dict[str, datetime.datetime] + self.last_autotune = {} # type: Dict[str, datetime.datetime] + self.osdspec_previews = {} # type: Dict[str, List[Dict[str, Any]]] + self.osdspec_last_applied = {} # type: Dict[str, Dict[str, datetime.datetime]] + self.networks = {} # type: Dict[str, Dict[str, Dict[str, List[str]]]] + self.last_device_update = {} # type: Dict[str, datetime.datetime] + self.last_device_change = {} # type: Dict[str, datetime.datetime] + self.daemon_refresh_queue = [] # type: List[str] + self.device_refresh_queue = [] # type: List[str] + self.osdspec_previews_refresh_queue = [] # type: List[str] + + # host -> daemon name -> dict + self.daemon_config_deps = {} # type: Dict[str, Dict[str, Dict[str,Any]]] + self.last_host_check = {} # type: Dict[str, datetime.datetime] + self.loading_osdspec_preview = set() # type: Set[str] + self.last_client_files: Dict[str, Dict[str, Tuple[str, int, int, int]]] = {} + self.registry_login_queue: Set[str] = set() + + self.scheduled_daemon_actions: Dict[str, Dict[str, str]] = {} + + def load(self): + # type: () -> None + for k, v in self.mgr.get_store_prefix(HOST_CACHE_PREFIX).items(): + host = k[len(HOST_CACHE_PREFIX):] + if host not in self.mgr.inventory: + self.mgr.log.warning('removing stray HostCache host record %s' % ( + host)) + self.mgr.set_store(k, None) + try: + j = json.loads(v) + if 'last_device_update' in j: + self.last_device_update[host] = str_to_datetime(j['last_device_update']) + else: + self.device_refresh_queue.append(host) + if 'last_device_change' in j: + self.last_device_change[host] = str_to_datetime(j['last_device_change']) + # for services, we ignore the persisted last_*_update + # and always trigger a new scrape on mgr restart. + self.daemon_refresh_queue.append(host) + self.daemons[host] = {} + self.osdspec_previews[host] = [] + self.osdspec_last_applied[host] = {} + self.devices[host] = [] + self.networks[host] = {} + self.daemon_config_deps[host] = {} + for name, d in j.get('daemons', {}).items(): + self.daemons[host][name] = \ + orchestrator.DaemonDescription.from_json(d) + for d in j.get('devices', []): + self.devices[host].append(inventory.Device.from_json(d)) + self.networks[host] = j.get('networks_and_interfaces', {}) + self.osdspec_previews[host] = j.get('osdspec_previews', {}) + self.last_client_files[host] = j.get('last_client_files', {}) + for name, ts in j.get('osdspec_last_applied', {}).items(): + self.osdspec_last_applied[host][name] = str_to_datetime(ts) + + for name, d in j.get('daemon_config_deps', {}).items(): + self.daemon_config_deps[host][name] = { + 'deps': d.get('deps', []), + 'last_config': str_to_datetime(d['last_config']), + } + if 'last_host_check' in j: + self.last_host_check[host] = str_to_datetime(j['last_host_check']) + self.registry_login_queue.add(host) + self.scheduled_daemon_actions[host] = j.get('scheduled_daemon_actions', {}) + + self.mgr.log.debug( + 'HostCache.load: host %s has %d daemons, ' + '%d devices, %d networks' % ( + host, len(self.daemons[host]), len(self.devices[host]), + len(self.networks[host]))) + except Exception as e: + self.mgr.log.warning('unable to load cached state for %s: %s' % ( + host, e)) + pass + + def update_host_daemons(self, host, dm): + # type: (str, Dict[str, orchestrator.DaemonDescription]) -> None + self.daemons[host] = dm + self.last_daemon_update[host] = datetime_now() + + def update_host_facts(self, host, facts): + # type: (str, Dict[str, Dict[str, Any]]) -> None + self.facts[host] = facts + self.last_facts_update[host] = datetime_now() + + def update_autotune(self, host: str) -> None: + self.last_autotune[host] = datetime_now() + + def invalidate_autotune(self, host: str) -> None: + if host in self.last_autotune: + del self.last_autotune[host] + + def devices_changed(self, host: str, b: List[inventory.Device]) -> bool: + a = self.devices[host] + if len(a) != len(b): + return True + aj = {d.path: d.to_json() for d in a} + bj = {d.path: d.to_json() for d in b} + if aj != bj: + self.mgr.log.info("Detected new or changed devices on %s" % host) + return True + return False + + def update_host_devices_networks( + self, + host: str, + dls: List[inventory.Device], + nets: Dict[str, Dict[str, List[str]]] + ) -> None: + if ( + host not in self.devices + or host not in self.last_device_change + or self.devices_changed(host, dls) + ): + self.last_device_change[host] = datetime_now() + self.last_device_update[host] = datetime_now() + self.devices[host] = dls + self.networks[host] = nets + + def update_daemon_config_deps(self, host: str, name: str, deps: List[str], stamp: datetime.datetime) -> None: + self.daemon_config_deps[host][name] = { + 'deps': deps, + 'last_config': stamp, + } + + def update_last_host_check(self, host): + # type: (str) -> None + self.last_host_check[host] = datetime_now() + + def update_osdspec_last_applied(self, host, service_name, ts): + # type: (str, str, datetime.datetime) -> None + self.osdspec_last_applied[host][service_name] = ts + + def update_client_file(self, + host: str, + path: str, + digest: str, + mode: int, + uid: int, + gid: int) -> None: + if host not in self.last_client_files: + self.last_client_files[host] = {} + self.last_client_files[host][path] = (digest, mode, uid, gid) + + def removed_client_file(self, host: str, path: str) -> None: + if ( + host in self.last_client_files + and path in self.last_client_files[host] + ): + del self.last_client_files[host][path] + + def prime_empty_host(self, host): + # type: (str) -> None + """ + Install an empty entry for a host + """ + self.daemons[host] = {} + self.devices[host] = [] + self.networks[host] = {} + self.osdspec_previews[host] = [] + self.osdspec_last_applied[host] = {} + self.daemon_config_deps[host] = {} + self.daemon_refresh_queue.append(host) + self.device_refresh_queue.append(host) + self.osdspec_previews_refresh_queue.append(host) + self.registry_login_queue.add(host) + self.last_client_files[host] = {} + + def refresh_all_host_info(self, host): + # type: (str) -> None + + self.last_host_check.pop(host, None) + self.daemon_refresh_queue.append(host) + self.registry_login_queue.add(host) + self.device_refresh_queue.append(host) + self.last_facts_update.pop(host, None) + self.osdspec_previews_refresh_queue.append(host) + self.last_autotune.pop(host, None) + + def invalidate_host_daemons(self, host): + # type: (str) -> None + self.daemon_refresh_queue.append(host) + if host in self.last_daemon_update: + del self.last_daemon_update[host] + self.mgr.event.set() + + def invalidate_host_devices(self, host): + # type: (str) -> None + self.device_refresh_queue.append(host) + if host in self.last_device_update: + del self.last_device_update[host] + self.mgr.event.set() + + def distribute_new_registry_login_info(self) -> None: + self.registry_login_queue = set(self.mgr.inventory.keys()) + + def save_host(self, host: str) -> None: + j: Dict[str, Any] = { + 'daemons': {}, + 'devices': [], + 'osdspec_previews': [], + 'osdspec_last_applied': {}, + 'daemon_config_deps': {}, + } + if host in self.last_daemon_update: + j['last_daemon_update'] = datetime_to_str(self.last_daemon_update[host]) + if host in self.last_device_update: + j['last_device_update'] = datetime_to_str(self.last_device_update[host]) + if host in self.last_device_change: + j['last_device_change'] = datetime_to_str(self.last_device_change[host]) + if host in self.daemons: + for name, dd in self.daemons[host].items(): + j['daemons'][name] = dd.to_json() + if host in self.devices: + for d in self.devices[host]: + j['devices'].append(d.to_json()) + if host in self.networks: + j['networks_and_interfaces'] = self.networks[host] + if host in self.daemon_config_deps: + for name, depi in self.daemon_config_deps[host].items(): + j['daemon_config_deps'][name] = { + 'deps': depi.get('deps', []), + 'last_config': datetime_to_str(depi['last_config']), + } + if host in self.osdspec_previews and self.osdspec_previews[host]: + j['osdspec_previews'] = self.osdspec_previews[host] + if host in self.osdspec_last_applied: + for name, ts in self.osdspec_last_applied[host].items(): + j['osdspec_last_applied'][name] = datetime_to_str(ts) + + if host in self.last_host_check: + j['last_host_check'] = datetime_to_str(self.last_host_check[host]) + + if host in self.last_client_files: + j['last_client_files'] = self.last_client_files[host] + if host in self.scheduled_daemon_actions: + j['scheduled_daemon_actions'] = self.scheduled_daemon_actions[host] + + self.mgr.set_store(HOST_CACHE_PREFIX + host, json.dumps(j)) + + def rm_host(self, host): + # type: (str) -> None + if host in self.daemons: + del self.daemons[host] + if host in self.devices: + del self.devices[host] + if host in self.facts: + del self.facts[host] + if host in self.last_facts_update: + del self.last_facts_update[host] + if host in self.last_autotune: + del self.last_autotune[host] + if host in self.osdspec_previews: + del self.osdspec_previews[host] + if host in self.osdspec_last_applied: + del self.osdspec_last_applied[host] + if host in self.loading_osdspec_preview: + self.loading_osdspec_preview.remove(host) + if host in self.networks: + del self.networks[host] + if host in self.last_daemon_update: + del self.last_daemon_update[host] + if host in self.last_device_update: + del self.last_device_update[host] + if host in self.last_device_change: + del self.last_device_change[host] + if host in self.daemon_config_deps: + del self.daemon_config_deps[host] + if host in self.scheduled_daemon_actions: + del self.scheduled_daemon_actions[host] + if host in self.last_client_files: + del self.last_client_files[host] + self.mgr.set_store(HOST_CACHE_PREFIX + host, None) + + def get_hosts(self): + # type: () -> List[str] + return list(self.daemons) + + def get_facts(self, host: str) -> Dict[str, Any]: + return self.facts.get(host, {}) + + def _get_daemons(self) -> Iterator[orchestrator.DaemonDescription]: + for dm in self.daemons.copy().values(): + yield from dm.values() + + def get_daemons(self): + # type: () -> List[orchestrator.DaemonDescription] + return list(self._get_daemons()) + + def get_daemons_by_host(self, host: str) -> List[orchestrator.DaemonDescription]: + return list(self.daemons.get(host, {}).values()) + + def get_daemon(self, daemon_name: str, host: Optional[str] = None) -> orchestrator.DaemonDescription: + assert not daemon_name.startswith('ha-rgw.') + dds = self.get_daemons_by_host(host) if host else self._get_daemons() + for dd in dds: + if dd.name() == daemon_name: + return dd + + raise orchestrator.OrchestratorError(f'Unable to find {daemon_name} daemon(s)') + + def has_daemon(self, daemon_name: str, host: Optional[str] = None) -> bool: + try: + self.get_daemon(daemon_name, host) + except orchestrator.OrchestratorError: + return False + return True + + def get_daemons_with_volatile_status(self) -> Iterator[Tuple[str, Dict[str, orchestrator.DaemonDescription]]]: + def alter(host: str, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription: + dd = copy(dd_orig) + if host in self.mgr.offline_hosts: + dd.status = orchestrator.DaemonDescriptionStatus.error + dd.status_desc = 'host is offline' + elif self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance": + # We do not refresh daemons on hosts in maintenance mode, so stored daemon statuses + # could be wrong. We must assume maintenance is working and daemons are stopped + dd.status = orchestrator.DaemonDescriptionStatus.stopped + dd.events = self.mgr.events.get_for_daemon(dd.name()) + return dd + + for host, dm in self.daemons.copy().items(): + yield host, {name: alter(host, d) for name, d in dm.items()} + + def get_daemons_by_service(self, service_name): + # type: (str) -> List[orchestrator.DaemonDescription] + assert not service_name.startswith('keepalived.') + assert not service_name.startswith('haproxy.') + + return list(dd for dd in self._get_daemons() if dd.service_name() == service_name) + + def get_daemons_by_type(self, service_type: str, host: str = '') -> List[orchestrator.DaemonDescription]: + assert service_type not in ['keepalived', 'haproxy'] + + daemons = self.daemons[host].values() if host else self._get_daemons() + + return [d for d in daemons if d.daemon_type in service_to_daemon_types(service_type)] + + def get_daemon_types(self, hostname: str) -> Set[str]: + """Provide a list of the types of daemons on the host""" + return cast(Set[str], {d.daemon_type for d in self.daemons[hostname].values()}) + + def get_daemon_names(self): + # type: () -> List[str] + return [d.name() for d in self._get_daemons()] + + def get_daemon_last_config_deps(self, host: str, name: str) -> Tuple[Optional[List[str]], Optional[datetime.datetime]]: + if host in self.daemon_config_deps: + if name in self.daemon_config_deps[host]: + return self.daemon_config_deps[host][name].get('deps', []), \ + self.daemon_config_deps[host][name].get('last_config', None) + return None, None + + def get_host_client_files(self, host: str) -> Dict[str, Tuple[str, int, int, int]]: + return self.last_client_files.get(host, {}) + + def host_needs_daemon_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping daemon refresh') + return False + if host in self.daemon_refresh_queue: + self.daemon_refresh_queue.remove(host) + return True + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.daemon_cache_timeout) + if host not in self.last_daemon_update or self.last_daemon_update[host] < cutoff: + return True + return False + + def host_needs_facts_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping gather facts refresh') + return False + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.facts_cache_timeout) + if host not in self.last_facts_update or self.last_facts_update[host] < cutoff: + return True + return False + + def host_needs_autotune_memory(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping autotune') + return False + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.autotune_interval) + if host not in self.last_autotune or self.last_autotune[host] < cutoff: + return True + return False + + def host_had_daemon_refresh(self, host: str) -> bool: + """ + ... at least once. + """ + if host in self.last_daemon_update: + return True + if host not in self.daemons: + return False + return bool(self.daemons[host]) + + def host_needs_device_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping device refresh') + return False + if host in self.device_refresh_queue: + self.device_refresh_queue.remove(host) + return True + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.device_cache_timeout) + if host not in self.last_device_update or self.last_device_update[host] < cutoff: + return True + return False + + def host_needs_osdspec_preview_refresh(self, host: str) -> bool: + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping osdspec preview refresh') + return False + if host in self.osdspec_previews_refresh_queue: + self.osdspec_previews_refresh_queue.remove(host) + return True + # Since this is dependent on other factors (device and spec) this does not need + # to be updated periodically. + return False + + def host_needs_check(self, host): + # type: (str) -> bool + cutoff = datetime_now() - datetime.timedelta( + seconds=self.mgr.host_check_interval) + return host not in self.last_host_check or self.last_host_check[host] < cutoff + + def osdspec_needs_apply(self, host: str, spec: ServiceSpec) -> bool: + if ( + host not in self.devices + or host not in self.last_device_change + or host not in self.last_device_update + or host not in self.osdspec_last_applied + or spec.service_name() not in self.osdspec_last_applied[host] + ): + return True + created = self.mgr.spec_store.get_created(spec) + if not created or created > self.last_device_change[host]: + return True + return self.osdspec_last_applied[host][spec.service_name()] < self.last_device_change[host] + + def host_needs_registry_login(self, host: str) -> bool: + if host in self.mgr.offline_hosts: + return False + if host in self.registry_login_queue: + self.registry_login_queue.remove(host) + return True + return False + + def add_daemon(self, host, dd): + # type: (str, orchestrator.DaemonDescription) -> None + assert host in self.daemons + self.daemons[host][dd.name()] = dd + + def rm_daemon(self, host: str, name: str) -> None: + assert not name.startswith('ha-rgw.') + + if host in self.daemons: + if name in self.daemons[host]: + del self.daemons[host][name] + + def daemon_cache_filled(self) -> bool: + """ + i.e. we have checked the daemons for each hosts at least once. + excluding offline hosts. + + We're not checking for `host_needs_daemon_refresh`, as this might never be + False for all hosts. + """ + return all((self.host_had_daemon_refresh(h) or h in self.mgr.offline_hosts) + for h in self.get_hosts()) + + def schedule_daemon_action(self, host: str, daemon_name: str, action: str) -> None: + assert not daemon_name.startswith('ha-rgw.') + + priorities = { + 'start': 1, + 'restart': 2, + 'reconfig': 3, + 'redeploy': 4, + 'stop': 5, + } + existing_action = self.scheduled_daemon_actions.get(host, {}).get(daemon_name, None) + if existing_action and priorities[existing_action] > priorities[action]: + logger.debug( + f'skipping {action}ing {daemon_name}, cause {existing_action} already scheduled.') + return + + if host not in self.scheduled_daemon_actions: + self.scheduled_daemon_actions[host] = {} + self.scheduled_daemon_actions[host][daemon_name] = action + + def rm_scheduled_daemon_action(self, host: str, daemon_name: str) -> bool: + found = False + if host in self.scheduled_daemon_actions: + if daemon_name in self.scheduled_daemon_actions[host]: + del self.scheduled_daemon_actions[host][daemon_name] + found = True + if not self.scheduled_daemon_actions[host]: + del self.scheduled_daemon_actions[host] + return found + + def get_scheduled_daemon_action(self, host: str, daemon: str) -> Optional[str]: + assert not daemon.startswith('ha-rgw.') + + return self.scheduled_daemon_actions.get(host, {}).get(daemon) + + +class EventStore(): + def __init__(self, mgr): + # type: (CephadmOrchestrator) -> None + self.mgr: CephadmOrchestrator = mgr + self.events = {} # type: Dict[str, List[OrchestratorEvent]] + + def add(self, event: OrchestratorEvent) -> None: + + if event.kind_subject() not in self.events: + self.events[event.kind_subject()] = [event] + + for e in self.events[event.kind_subject()]: + if e.message == event.message: + return + + self.events[event.kind_subject()].append(event) + + # limit to five events for now. + self.events[event.kind_subject()] = self.events[event.kind_subject()][-5:] + + def for_service(self, spec: ServiceSpec, level: str, message: str) -> None: + e = OrchestratorEvent(datetime_now(), 'service', + spec.service_name(), level, message) + self.add(e) + + def from_orch_error(self, e: OrchestratorError) -> None: + if e.event_subject is not None: + self.add(OrchestratorEvent( + datetime_now(), + e.event_subject[0], + e.event_subject[1], + "ERROR", + str(e) + )) + + def for_daemon(self, daemon_name: str, level: str, message: str) -> None: + e = OrchestratorEvent(datetime_now(), 'daemon', daemon_name, level, message) + self.add(e) + + def for_daemon_from_exception(self, daemon_name: str, e: Exception) -> None: + self.for_daemon( + daemon_name, + "ERROR", + str(e) + ) + + def cleanup(self) -> None: + # Needs to be properly done, in case events are persistently stored. + + unknowns: List[str] = [] + daemons = self.mgr.cache.get_daemon_names() + specs = self.mgr.spec_store.all_specs.keys() + for k_s, v in self.events.items(): + kind, subject = k_s.split(':') + if kind == 'service': + if subject not in specs: + unknowns.append(k_s) + elif kind == 'daemon': + if subject not in daemons: + unknowns.append(k_s) + + for k_s in unknowns: + del self.events[k_s] + + def get_for_service(self, name: str) -> List[OrchestratorEvent]: + return self.events.get('service:' + name, []) + + def get_for_daemon(self, name: str) -> List[OrchestratorEvent]: + return self.events.get('daemon:' + name, []) diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py new file mode 100644 index 000000000..f4a3056b2 --- /dev/null +++ b/src/pybind/mgr/cephadm/migrations.py @@ -0,0 +1,333 @@ +import json +import logging +from typing import TYPE_CHECKING, Iterator, Optional, Dict, Any + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec +from cephadm.schedule import HostAssignment +import rados + +from mgr_module import NFS_POOL_NAME +from orchestrator import OrchestratorError, DaemonDescription + +if TYPE_CHECKING: + from .module import CephadmOrchestrator + +LAST_MIGRATION = 5 + +logger = logging.getLogger(__name__) + + +class Migrations: + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr = mgr + + # Why having a global counter, instead of spec versions? + # + # for the first migration: + # The specs don't change in (this) migration. but the scheduler here. + # Adding the version to the specs at this time just felt wrong to me. + # + # And the specs are only another part of cephadm which needs potential upgrades. + # We have the cache, the inventory, the config store, the upgrade (imagine changing the + # upgrade code, while an old upgrade is still in progress), naming of daemons, + # fs-layout of the daemons, etc. + if self.mgr.migration_current is None: + self.set(LAST_MIGRATION) + + v = mgr.get_store('nfs_migration_queue') + self.nfs_migration_queue = json.loads(v) if v else [] + + # for some migrations, we don't need to do anything except for + # incrementing migration_current. + # let's try to shortcut things here. + self.migrate(True) + + def set(self, val: int) -> None: + self.mgr.set_module_option('migration_current', val) + self.mgr.migration_current = val + + def is_migration_ongoing(self) -> bool: + return self.mgr.migration_current != LAST_MIGRATION + + def verify_no_migration(self) -> None: + if self.is_migration_ongoing(): + # this is raised in module.serve() + raise OrchestratorError( + "cephadm migration still ongoing. Please wait, until the migration is complete.") + + def migrate(self, startup: bool = False) -> None: + if self.mgr.migration_current == 0: + if self.migrate_0_1(): + self.set(1) + + if self.mgr.migration_current == 1: + if self.migrate_1_2(): + self.set(2) + + if self.mgr.migration_current == 2 and not startup: + if self.migrate_2_3(): + self.set(3) + + if self.mgr.migration_current == 3: + if self.migrate_3_4(): + self.set(4) + + if self.mgr.migration_current == 4: + if self.migrate_4_5(): + self.set(5) + + def migrate_0_1(self) -> bool: + """ + Migration 0 -> 1 + New scheduler that takes PlacementSpec as the bound and not as recommendation. + I.e. the new scheduler won't suggest any new placements outside of the hosts + specified by label etc. + + Which means, we have to make sure, we're not removing any daemons directly after + upgrading to the new scheduler. + + There is a potential race here: + 1. user updates his spec to remove daemons + 2. mgr gets upgraded to new scheduler, before the old scheduler removed the daemon + 3. now, we're converting the spec to explicit placement, thus reverting (1.) + I think this is ok. + """ + + def interesting_specs() -> Iterator[ServiceSpec]: + for s in self.mgr.spec_store.all_specs.values(): + if s.unmanaged: + continue + p = s.placement + if p is None: + continue + if p.count is None: + continue + if not p.hosts and not p.host_pattern and not p.label: + continue + yield s + + def convert_to_explicit(spec: ServiceSpec) -> None: + existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) + placements, to_add, to_remove = HostAssignment( + spec=spec, + hosts=self.mgr.inventory.all_specs(), + unreachable_hosts=self.mgr._unreachable_hosts(), + daemons=existing_daemons, + ).place() + + # We have to migrate, only if the new scheduler would remove daemons + if len(placements) >= len(existing_daemons): + return + + def to_hostname(d: DaemonDescription) -> HostPlacementSpec: + if d.hostname in old_hosts: + return old_hosts[d.hostname] + else: + assert d.hostname + return HostPlacementSpec(d.hostname, '', '') + + old_hosts = {h.hostname: h for h in spec.placement.hosts} + new_hosts = [to_hostname(d) for d in existing_daemons] + + new_placement = PlacementSpec( + hosts=new_hosts, + count=spec.placement.count + ) + + new_spec = ServiceSpec.from_json(spec.to_json()) + new_spec.placement = new_placement + + logger.info(f"Migrating {spec.one_line_str()} to explicit placement") + + self.mgr.spec_store.save(new_spec) + + specs = list(interesting_specs()) + if not specs: + return True # nothing to do. shortcut + + if not self.mgr.cache.daemon_cache_filled(): + logger.info("Unable to migrate yet. Daemon Cache still incomplete.") + return False + + for spec in specs: + convert_to_explicit(spec) + + return True + + def migrate_1_2(self) -> bool: + """ + After 15.2.4, we unified some service IDs: MONs, MGRs etc no longer have a service id. + Which means, the service names changed: + + mon.foo -> mon + mgr.foo -> mgr + + This fixes the data structure consistency + """ + bad_specs = {} + for name, spec in self.mgr.spec_store.all_specs.items(): + if name != spec.service_name(): + bad_specs[name] = (spec.service_name(), spec) + + for old, (new, old_spec) in bad_specs.items(): + if new not in self.mgr.spec_store.all_specs: + spec = old_spec + else: + spec = self.mgr.spec_store.all_specs[new] + spec.unmanaged = True + self.mgr.spec_store.save(spec) + self.mgr.spec_store.finally_rm(old) + + return True + + def migrate_2_3(self) -> bool: + if self.nfs_migration_queue: + from nfs.cluster import create_ganesha_pool + + create_ganesha_pool(self.mgr) + for service_id, pool, ns in self.nfs_migration_queue: + if pool != '.nfs': + self.migrate_nfs_spec(service_id, pool, ns) + self.nfs_migration_queue = [] + self.mgr.log.info('Done migrating all NFS services') + return True + + def migrate_nfs_spec(self, service_id: str, pool: str, ns: Optional[str]) -> None: + renamed = False + if service_id.startswith('ganesha-'): + service_id = service_id[8:] + renamed = True + + self.mgr.log.info( + f'Migrating nfs.{service_id} from legacy pool {pool} namespace {ns}' + ) + + # read exports + ioctx = self.mgr.rados.open_ioctx(pool) + if ns is not None: + ioctx.set_namespace(ns) + object_iterator = ioctx.list_objects() + exports = [] + while True: + try: + obj = object_iterator.__next__() + if obj.key.startswith('export-'): + self.mgr.log.debug(f'reading {obj.key}') + exports.append(obj.read().decode()) + except StopIteration: + break + self.mgr.log.info(f'Found {len(exports)} exports for legacy nfs.{service_id}') + + # copy grace file + if service_id != ns: + try: + grace = ioctx.read("grace") + new_ioctx = self.mgr.rados.open_ioctx(NFS_POOL_NAME) + new_ioctx.set_namespace(service_id) + new_ioctx.write_full("grace", grace) + self.mgr.log.info('Migrated nfs-ganesha grace file') + except rados.ObjectNotFound: + self.mgr.log.debug('failed to read old grace file; skipping') + + if renamed and f'nfs.ganesha-{service_id}' in self.mgr.spec_store: + # rename from nfs.ganesha-* to nfs.*. This will destroy old daemons and + # deploy new ones. + self.mgr.log.info(f'Replacing nfs.ganesha-{service_id} with nfs.{service_id}') + spec = self.mgr.spec_store[f'nfs.ganesha-{service_id}'].spec + self.mgr.spec_store.rm(f'nfs.ganesha-{service_id}') + spec.service_id = service_id + self.mgr.spec_store.save(spec, True) + + # We have to remove the old daemons here as well, otherwise we'll end up with a port conflict. + daemons = [d.name() + for d in self.mgr.cache.get_daemons_by_service(f'nfs.ganesha-{service_id}')] + self.mgr.log.info(f'Removing old nfs.ganesha-{service_id} daemons {daemons}') + self.mgr.remove_daemons(daemons) + else: + # redeploy all ganesha daemons to ensures that the daemon + # cephx are correct AND container configs are set up properly + daemons = [d.name() for d in self.mgr.cache.get_daemons_by_service(f'nfs.{service_id}')] + self.mgr.log.info(f'Removing old nfs.{service_id} daemons {daemons}') + self.mgr.remove_daemons(daemons) + + # re-save service spec (without pool and namespace properties!) + spec = self.mgr.spec_store[f'nfs.{service_id}'].spec + self.mgr.spec_store.save(spec) + + # import exports + for export in exports: + ex = '' + for line in export.splitlines(): + if ( + line.startswith(' secret_access_key =') + or line.startswith(' user_id =') + ): + continue + ex += line + '\n' + self.mgr.log.debug(f'importing export: {ex}') + ret, out, err = self.mgr.mon_command({ + 'prefix': 'nfs export apply', + 'cluster_id': service_id + }, inbuf=ex) + if ret: + self.mgr.log.warning(f'Failed to migrate export ({ret}): {err}\nExport was:\n{ex}') + self.mgr.log.info(f'Done migrating nfs.{service_id}') + + def migrate_3_4(self) -> bool: + # We can't set any host with the _admin label, but we're + # going to warn when calling `ceph orch host rm...` + if 'client.admin' not in self.mgr.keys.keys: + self.mgr._client_keyring_set( + entity='client.admin', + placement='label:_admin', + ) + return True + + def migrate_4_5(self) -> bool: + registry_url = self.mgr.get_module_option('registry_url') + registry_username = self.mgr.get_module_option('registry_username') + registry_password = self.mgr.get_module_option('registry_password') + if registry_url and registry_username and registry_password: + + registry_credentials = {'url': registry_url, + 'username': registry_username, 'password': registry_password} + self.mgr.set_store('registry_credentials', json.dumps(registry_credentials)) + + self.mgr.set_module_option('registry_url', None) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': 'mgr', + 'key': 'mgr/cephadm/registry_url', + }) + self.mgr.set_module_option('registry_username', None) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': 'mgr', + 'key': 'mgr/cephadm/registry_username', + }) + self.mgr.set_module_option('registry_password', None) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': 'mgr', + 'key': 'mgr/cephadm/registry_password', + }) + + self.mgr.log.info('Done migrating registry login info') + return True + + +def queue_migrate_nfs_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None: + """ + After 16.2.5 we dropped the NFSServiceSpec pool and namespace properties. + Queue up a migration to process later, once we are sure that RADOS is available + and so on. + """ + service_id = spec_dict['spec']['service_id'] + args = spec_dict['spec'].get('spec', {}) + pool = args.pop('pool', 'nfs-ganesha') + ns = args.pop('namespace', service_id) + queued = mgr.get_store('nfs_migration_queue') or '[]' + ls = json.loads(queued) + ls.append([service_id, pool, ns]) + mgr.set_store('nfs_migration_queue', json.dumps(ls)) + mgr.log.info(f'Queued nfs.{service_id} for migration') diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py new file mode 100644 index 000000000..9fc4298a8 --- /dev/null +++ b/src/pybind/mgr/cephadm/module.py @@ -0,0 +1,2974 @@ +import json +import errno +import ipaddress +import logging +import re +import shlex +from collections import defaultdict +from configparser import ConfigParser +from functools import wraps +from tempfile import TemporaryDirectory +from threading import Event + +import string +from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \ + Any, Set, TYPE_CHECKING, cast, NamedTuple, Sequence, Type + +import datetime +import os +import random +import tempfile +import multiprocessing.pool +import subprocess +from prettytable import PrettyTable + +from ceph.deployment import inventory +from ceph.deployment.drive_group import DriveGroupSpec +from ceph.deployment.service_spec import \ + ServiceSpec, PlacementSpec, \ + HostPlacementSpec, IngressSpec, IscsiServiceSpec +from ceph.utils import str_to_datetime, datetime_to_str, datetime_now +from cephadm.serve import CephadmServe +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec + +from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType +from mgr_util import create_self_signed_cert +import secrets +import orchestrator +from orchestrator.module import to_format, Format + +from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpec, \ + CLICommandMeta, DaemonDescription, DaemonDescriptionStatus, handle_orch_error, \ + service_to_daemon_types +from orchestrator._interface import GenericSpec +from orchestrator._interface import daemon_type_to_service + +from . import remotes +from . import utils +from .migrations import Migrations +from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ + RbdMirrorService, CrashService, CephadmService, CephfsMirrorService +from .services.ingress import IngressService +from .services.container import CustomContainerService +from .services.iscsi import IscsiService +from .services.nfs import NFSService +from .services.osd import OSDRemovalQueue, OSDService, OSD, NotFoundError +from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \ + NodeExporterService, SNMPGatewayService +from .services.exporter import CephadmExporter, CephadmExporterConfig +from .schedule import HostAssignment +from .inventory import Inventory, SpecStore, HostCache, EventStore, ClientKeyringStore, ClientKeyringSpec +from .upgrade import CephadmUpgrade +from .template import TemplateMgr +from .utils import CEPH_IMAGE_TYPES, RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES, forall_hosts, \ + cephadmNoImage, CEPH_UPGRADE_ORDER +from .configchecks import CephadmConfigChecks +from .offline_watcher import OfflineHostWatcher + +try: + import remoto + # NOTE(mattoliverau) Patch remoto until remoto PR + # (https://github.com/alfredodeza/remoto/pull/56) lands + from distutils.version import StrictVersion + if StrictVersion(remoto.__version__) <= StrictVersion('1.2'): + def remoto_has_connection(self: Any) -> bool: + return self.gateway.hasreceiver() + + from remoto.backends import BaseConnection + BaseConnection.has_connection = remoto_has_connection + import remoto.process +except ImportError as e: + remoto = None + remoto_import_error = str(e) + +logger = logging.getLogger(__name__) + +T = TypeVar('T') + +DEFAULT_SSH_CONFIG = """ +Host * + User root + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + ConnectTimeout=30 +""" + +# Default container images ----------------------------------------------------- +DEFAULT_IMAGE = 'quay.io/ceph/ceph' +DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4' +DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1' +DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0' +DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5' +DEFAULT_HAPROXY_IMAGE = 'docker.io/library/haproxy:2.3' +DEFAULT_KEEPALIVED_IMAGE = 'docker.io/arcts/keepalived' +DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1' +# ------------------------------------------------------------------------------ + + +def service_inactive(spec_name: str) -> Callable: + def inner(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + obj = args[0] + if obj.get_store(f"spec.{spec_name}") is not None: + return 1, "", f"Unable to change configuration of an active service {spec_name}" + return func(*args, **kwargs) + return wrapper + return inner + + +def host_exists(hostname_position: int = 1) -> Callable: + """Check that a hostname exists in the inventory""" + def inner(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + this = args[0] # self object + hostname = args[hostname_position] + if hostname not in this.cache.get_hosts(): + candidates = ','.join([h for h in this.cache.get_hosts() if h.startswith(hostname)]) + help_msg = f"Did you mean {candidates}?" if candidates else "" + raise OrchestratorError( + f"Cannot find host '{hostname}' in the inventory. {help_msg}") + + return func(*args, **kwargs) + return wrapper + return inner + + +class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, + metaclass=CLICommandMeta): + + _STORE_HOST_PREFIX = "host" + + instance = None + NOTIFY_TYPES = [NotifyType.mon_map, NotifyType.pg_summary] + NATIVE_OPTIONS = [] # type: List[Any] + MODULE_OPTIONS = [ + Option( + 'ssh_config_file', + type='str', + default=None, + desc='customized SSH config file to connect to managed hosts', + ), + Option( + 'device_cache_timeout', + type='secs', + default=30 * 60, + desc='seconds to cache device inventory', + ), + Option( + 'device_enhanced_scan', + type='bool', + default=False, + desc='Use libstoragemgmt during device scans', + ), + Option( + 'daemon_cache_timeout', + type='secs', + default=10 * 60, + desc='seconds to cache service (daemon) inventory', + ), + Option( + 'facts_cache_timeout', + type='secs', + default=1 * 60, + desc='seconds to cache host facts data', + ), + Option( + 'host_check_interval', + type='secs', + default=10 * 60, + desc='how frequently to perform a host check', + ), + Option( + 'mode', + type='str', + enum_allowed=['root', 'cephadm-package'], + default='root', + desc='mode for remote execution of cephadm', + ), + Option( + 'container_image_base', + default=DEFAULT_IMAGE, + desc='Container image name, without the tag', + runtime=True, + ), + Option( + 'container_image_prometheus', + default=DEFAULT_PROMETHEUS_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_grafana', + default=DEFAULT_GRAFANA_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_alertmanager', + default=DEFAULT_ALERT_MANAGER_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_node_exporter', + default=DEFAULT_NODE_EXPORTER_IMAGE, + desc='Prometheus container image', + ), + Option( + 'container_image_haproxy', + default=DEFAULT_HAPROXY_IMAGE, + desc='HAproxy container image', + ), + Option( + 'container_image_keepalived', + default=DEFAULT_KEEPALIVED_IMAGE, + desc='Keepalived container image', + ), + Option( + 'container_image_snmp_gateway', + default=DEFAULT_SNMP_GATEWAY_IMAGE, + desc='SNMP Gateway container image', + ), + Option( + 'warn_on_stray_hosts', + type='bool', + default=True, + desc='raise a health warning if daemons are detected on a host ' + 'that is not managed by cephadm', + ), + Option( + 'warn_on_stray_daemons', + type='bool', + default=True, + desc='raise a health warning if daemons are detected ' + 'that are not managed by cephadm', + ), + Option( + 'warn_on_failed_host_check', + type='bool', + default=True, + desc='raise a health warning if the host check fails', + ), + Option( + 'log_to_cluster', + type='bool', + default=True, + desc='log to the "cephadm" cluster log channel"', + ), + Option( + 'allow_ptrace', + type='bool', + default=False, + desc='allow SYS_PTRACE capability on ceph containers', + long_desc='The SYS_PTRACE capability is needed to attach to a ' + 'process with gdb or strace. Enabling this options ' + 'can allow debugging daemons that encounter problems ' + 'at runtime.', + ), + Option( + 'container_init', + type='bool', + default=True, + desc='Run podman/docker with `--init`' + ), + Option( + 'prometheus_alerts_path', + type='str', + default='/etc/prometheus/ceph/ceph_default_alerts.yml', + desc='location of alerts to include in prometheus deployments', + ), + Option( + 'migration_current', + type='int', + default=None, + desc='internal - do not modify', + # used to track track spec and other data migrations. + ), + Option( + 'config_dashboard', + type='bool', + default=True, + desc='manage configs like API endpoints in Dashboard.' + ), + Option( + 'manage_etc_ceph_ceph_conf', + type='bool', + default=False, + desc='Manage and own /etc/ceph/ceph.conf on the hosts.', + ), + Option( + 'manage_etc_ceph_ceph_conf_hosts', + type='str', + default='*', + desc='PlacementSpec describing on which hosts to manage /etc/ceph/ceph.conf', + ), + # not used anymore + Option( + 'registry_url', + type='str', + default=None, + desc='Registry url for login purposes. This is not the default registry' + ), + Option( + 'registry_username', + type='str', + default=None, + desc='Custom repository username. Only used for logging into a registry.' + ), + Option( + 'registry_password', + type='str', + default=None, + desc='Custom repository password. Only used for logging into a registry.' + ), + #### + Option( + 'registry_insecure', + type='bool', + default=False, + desc='Registry is to be considered insecure (no TLS available). Only for development purposes.' + ), + Option( + 'use_repo_digest', + type='bool', + default=True, + desc='Automatically convert image tags to image digest. Make sure all daemons use the same image', + ), + Option( + 'config_checks_enabled', + type='bool', + default=False, + desc='Enable or disable the cephadm configuration analysis', + ), + Option( + 'default_registry', + type='str', + default='docker.io', + desc='Search-registry to which we should normalize unqualified image names. ' + 'This is not the default registry', + ), + Option( + 'max_count_per_host', + type='int', + default=10, + desc='max number of daemons per service per host', + ), + Option( + 'autotune_memory_target_ratio', + type='float', + default=.7, + desc='ratio of total system memory to divide amongst autotuned daemons' + ), + Option( + 'autotune_interval', + type='secs', + default=10 * 60, + desc='how frequently to autotune daemon memory' + ), + Option( + 'max_osd_draining_count', + type='int', + default=10, + desc='max number of osds that will be drained simultaneously when osds are removed' + ), + ] + + def __init__(self, *args: Any, **kwargs: Any): + super(CephadmOrchestrator, self).__init__(*args, **kwargs) + self._cluster_fsid: str = self.get('mon_map')['fsid'] + self.last_monmap: Optional[datetime.datetime] = None + + # for serve() + self.run = True + self.event = Event() + + if self.get_store('pause'): + self.paused = True + else: + self.paused = False + + # for mypy which does not run the code + if TYPE_CHECKING: + self.ssh_config_file = None # type: Optional[str] + self.device_cache_timeout = 0 + self.daemon_cache_timeout = 0 + self.facts_cache_timeout = 0 + self.host_check_interval = 0 + self.max_count_per_host = 0 + self.mode = '' + self.container_image_base = '' + self.container_image_prometheus = '' + self.container_image_grafana = '' + self.container_image_alertmanager = '' + self.container_image_node_exporter = '' + self.container_image_haproxy = '' + self.container_image_keepalived = '' + self.container_image_snmp_gateway = '' + self.warn_on_stray_hosts = True + self.warn_on_stray_daemons = True + self.warn_on_failed_host_check = True + self.allow_ptrace = False + self.container_init = True + self.prometheus_alerts_path = '' + self.migration_current: Optional[int] = None + self.config_dashboard = True + self.manage_etc_ceph_ceph_conf = True + self.manage_etc_ceph_ceph_conf_hosts = '*' + self.registry_url: Optional[str] = None + self.registry_username: Optional[str] = None + self.registry_password: Optional[str] = None + self.registry_insecure: bool = False + self.use_repo_digest = True + self.default_registry = '' + self.autotune_memory_target_ratio = 0.0 + self.autotune_interval = 0 + self.apply_spec_fails: List[Tuple[str, str]] = [] + self.max_osd_draining_count = 10 + self.device_enhanced_scan = False + + self._cons: Dict[str, Tuple[remoto.backends.BaseConnection, + remoto.backends.LegacyModuleExecute]] = {} + + self.notify(NotifyType.mon_map, None) + self.config_notify() + + path = self.get_ceph_option('cephadm_path') + try: + assert isinstance(path, str) + with open(path, 'r') as f: + self._cephadm = f.read() + except (IOError, TypeError) as e: + raise RuntimeError("unable to read cephadm at '%s': %s" % ( + path, str(e))) + + self.cephadm_binary_path = self._get_cephadm_binary_path() + + self._worker_pool = multiprocessing.pool.ThreadPool(10) + + self._reconfig_ssh() + + CephadmOrchestrator.instance = self + + self.upgrade = CephadmUpgrade(self) + + self.health_checks: Dict[str, dict] = {} + + self.inventory = Inventory(self) + + self.cache = HostCache(self) + self.cache.load() + + self.to_remove_osds = OSDRemovalQueue(self) + self.to_remove_osds.load_from_store() + + self.spec_store = SpecStore(self) + self.spec_store.load() + + self.keys = ClientKeyringStore(self) + self.keys.load() + + # ensure the host lists are in sync + for h in self.inventory.keys(): + if h not in self.cache.daemons: + self.cache.prime_empty_host(h) + for h in self.cache.get_hosts(): + if h not in self.inventory: + self.cache.rm_host(h) + + # in-memory only. + self.events = EventStore(self) + self.offline_hosts: Set[str] = set() + + self.migration = Migrations(self) + + _service_clses: Sequence[Type[CephadmService]] = [ + OSDService, NFSService, MonService, MgrService, MdsService, + RgwService, RbdMirrorService, GrafanaService, AlertmanagerService, + PrometheusService, NodeExporterService, CrashService, IscsiService, + IngressService, CustomContainerService, CephadmExporter, CephfsMirrorService, + SNMPGatewayService, + ] + + # https://github.com/python/mypy/issues/8993 + self.cephadm_services: Dict[str, CephadmService] = { + cls.TYPE: cls(self) for cls in _service_clses} # type: ignore + + self.mgr_service: MgrService = cast(MgrService, self.cephadm_services['mgr']) + self.osd_service: OSDService = cast(OSDService, self.cephadm_services['osd']) + self.iscsi_service: IscsiService = cast(IscsiService, self.cephadm_services['iscsi']) + + self.template = TemplateMgr(self) + + self.requires_post_actions: Set[str] = set() + self.need_connect_dashboard_rgw = False + + self.config_checker = CephadmConfigChecks(self) + + self.offline_watcher = OfflineHostWatcher(self) + self.offline_watcher.start() + + def shutdown(self) -> None: + self.log.debug('shutdown') + self._worker_pool.close() + self._worker_pool.join() + self.offline_watcher.shutdown() + self.run = False + self.event.set() + + def _get_cephadm_service(self, service_type: str) -> CephadmService: + assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES + return self.cephadm_services[service_type] + + def _get_cephadm_binary_path(self) -> str: + import hashlib + m = hashlib.sha256() + m.update(self._cephadm.encode()) + return f'/var/lib/ceph/{self._cluster_fsid}/cephadm.{m.hexdigest()}' + + def _kick_serve_loop(self) -> None: + self.log.debug('_kick_serve_loop') + self.event.set() + + def serve(self) -> None: + """ + The main loop of cephadm. + + A command handler will typically change the declarative state + of cephadm. This loop will then attempt to apply this new state. + """ + serve = CephadmServe(self) + serve.serve() + + def set_container_image(self, entity: str, image: str) -> None: + self.check_mon_command({ + 'prefix': 'config set', + 'name': 'container_image', + 'value': image, + 'who': entity, + }) + + def config_notify(self) -> None: + """ + This method is called whenever one of our config options is changed. + + TODO: this method should be moved into mgr_module.py + """ + for opt in self.MODULE_OPTIONS: + setattr(self, + opt['name'], # type: ignore + self.get_module_option(opt['name'])) # type: ignore + self.log.debug(' mgr option %s = %s', + opt['name'], getattr(self, opt['name'])) # type: ignore + for opt in self.NATIVE_OPTIONS: + setattr(self, + opt, # type: ignore + self.get_ceph_option(opt)) + self.log.debug(' native option %s = %s', opt, getattr(self, opt)) # type: ignore + + self.event.set() + + def notify(self, notify_type: NotifyType, notify_id: Optional[str]) -> None: + if notify_type == NotifyType.mon_map: + # get monmap mtime so we can refresh configs when mons change + monmap = self.get('mon_map') + self.last_monmap = str_to_datetime(monmap['modified']) + if self.last_monmap and self.last_monmap > datetime_now(): + self.last_monmap = None # just in case clocks are skewed + if getattr(self, 'manage_etc_ceph_ceph_conf', False): + # getattr, due to notify() being called before config_notify() + self._kick_serve_loop() + if notify_type == NotifyType.pg_summary: + self._trigger_osd_removal() + + def _trigger_osd_removal(self) -> None: + remove_queue = self.to_remove_osds.as_osd_ids() + if not remove_queue: + return + data = self.get("osd_stats") + for osd in data.get('osd_stats', []): + if osd.get('num_pgs') == 0: + # if _ANY_ osd that is currently in the queue appears to be empty, + # start the removal process + if int(osd.get('osd')) in remove_queue: + self.log.debug('Found empty osd. Starting removal process') + # if the osd that is now empty is also part of the removal queue + # start the process + self._kick_serve_loop() + + def pause(self) -> None: + if not self.paused: + self.log.info('Paused') + self.set_store('pause', 'true') + self.paused = True + # wake loop so we update the health status + self._kick_serve_loop() + + def resume(self) -> None: + if self.paused: + self.log.info('Resumed') + self.paused = False + self.set_store('pause', None) + # unconditionally wake loop so that 'orch resume' can be used to kick + # cephadm + self._kick_serve_loop() + + def get_unique_name( + self, + daemon_type: str, + host: str, + existing: List[orchestrator.DaemonDescription], + prefix: Optional[str] = None, + forcename: Optional[str] = None, + rank: Optional[int] = None, + rank_generation: Optional[int] = None, + ) -> str: + """ + Generate a unique random service name + """ + suffix = daemon_type not in [ + 'mon', 'crash', + 'prometheus', 'node-exporter', 'grafana', 'alertmanager', + 'container', 'cephadm-exporter', 'snmp-gateway' + ] + if forcename: + if len([d for d in existing if d.daemon_id == forcename]): + raise orchestrator.OrchestratorValidationError( + f'name {daemon_type}.{forcename} already in use') + return forcename + + if '.' in host: + host = host.split('.')[0] + while True: + if prefix: + name = prefix + '.' + else: + name = '' + if rank is not None and rank_generation is not None: + name += f'{rank}.{rank_generation}.' + name += host + if suffix: + name += '.' + ''.join(random.choice(string.ascii_lowercase) + for _ in range(6)) + if len([d for d in existing if d.daemon_id == name]): + if not suffix: + raise orchestrator.OrchestratorValidationError( + f'name {daemon_type}.{name} already in use') + self.log.debug('name %s exists, trying again', name) + continue + return name + + def _reconfig_ssh(self) -> None: + temp_files = [] # type: list + ssh_options = [] # type: List[str] + + # ssh_config + ssh_config_fname = self.ssh_config_file + ssh_config = self.get_store("ssh_config") + if ssh_config is not None or ssh_config_fname is None: + if not ssh_config: + ssh_config = DEFAULT_SSH_CONFIG + f = tempfile.NamedTemporaryFile(prefix='cephadm-conf-') + os.fchmod(f.fileno(), 0o600) + f.write(ssh_config.encode('utf-8')) + f.flush() # make visible to other processes + temp_files += [f] + ssh_config_fname = f.name + if ssh_config_fname: + self.validate_ssh_config_fname(ssh_config_fname) + ssh_options += ['-F', ssh_config_fname] + self.ssh_config = ssh_config + + # identity + ssh_key = self.get_store("ssh_identity_key") + ssh_pub = self.get_store("ssh_identity_pub") + self.ssh_pub = ssh_pub + self.ssh_key = ssh_key + if ssh_key and ssh_pub: + tkey = tempfile.NamedTemporaryFile(prefix='cephadm-identity-') + tkey.write(ssh_key.encode('utf-8')) + os.fchmod(tkey.fileno(), 0o600) + tkey.flush() # make visible to other processes + tpub = open(tkey.name + '.pub', 'w') + os.fchmod(tpub.fileno(), 0o600) + tpub.write(ssh_pub) + tpub.flush() # make visible to other processes + temp_files += [tkey, tpub] + ssh_options += ['-i', tkey.name] + + self._temp_files = temp_files + ssh_options += ['-o', 'ServerAliveInterval=7', '-o', 'ServerAliveCountMax=3'] + self._ssh_options = ' '.join(ssh_options) # type: Optional[str] + + if self.mode == 'root': + self.ssh_user = self.get_store('ssh_user', default='root') + elif self.mode == 'cephadm-package': + self.ssh_user = 'cephadm' + + self._reset_cons() + + def validate_ssh_config_content(self, ssh_config: Optional[str]) -> None: + if ssh_config is None or len(ssh_config.strip()) == 0: + raise OrchestratorValidationError('ssh_config cannot be empty') + # StrictHostKeyChecking is [yes|no] ? + res = re.findall(r'StrictHostKeyChecking\s+.*', ssh_config) + if not res: + raise OrchestratorValidationError('ssh_config requires StrictHostKeyChecking') + for s in res: + if 'ask' in s.lower(): + raise OrchestratorValidationError(f'ssh_config cannot contain: \'{s}\'') + + def validate_ssh_config_fname(self, ssh_config_fname: str) -> None: + if not os.path.isfile(ssh_config_fname): + raise OrchestratorValidationError("ssh_config \"{}\" does not exist".format( + ssh_config_fname)) + + def _reset_con(self, host: str) -> None: + conn, r = self._cons.get(host, (None, None)) + if conn: + self.log.debug('_reset_con close %s' % host) + conn.exit() + del self._cons[host] + + def _reset_cons(self) -> None: + for host, conn_and_r in self._cons.items(): + self.log.debug('_reset_cons close %s' % host) + conn, r = conn_and_r + conn.exit() + self._cons = {} + + def update_watched_hosts(self) -> None: + # currently, we are watching hosts with nfs daemons + hosts_to_watch = [d.hostname for d in self.cache.get_daemons( + ) if d.daemon_type in RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES] + self.offline_watcher.set_hosts(list(set([h for h in hosts_to_watch if h is not None]))) + + def offline_hosts_remove(self, host: str) -> None: + if host in self.offline_hosts: + self.offline_hosts.remove(host) + + @staticmethod + def can_run() -> Tuple[bool, str]: + if remoto is not None: + return True, "" + else: + return False, "loading remoto library:{}".format( + remoto_import_error) + + def available(self) -> Tuple[bool, str, Dict[str, Any]]: + """ + The cephadm orchestrator is always available. + """ + ok, err = self.can_run() + if not ok: + return ok, err, {} + if not self.ssh_key or not self.ssh_pub: + return False, 'SSH keys not set. Use `ceph cephadm set-priv-key` and `ceph cephadm set-pub-key` or `ceph cephadm generate-key`', {} + + # mypy is unable to determine type for _processes since it's private + worker_count: int = self._worker_pool._processes # type: ignore + ret = { + "workers": worker_count, + "paused": self.paused, + } + + return True, err, ret + + def _validate_and_set_ssh_val(self, what: str, new: Optional[str], old: Optional[str]) -> None: + self.set_store(what, new) + self._reconfig_ssh() + if self.cache.get_hosts(): + # Can't check anything without hosts + host = self.cache.get_hosts()[0] + r = CephadmServe(self)._check_host(host) + if r is not None: + # connection failed reset user + self.set_store(what, old) + self._reconfig_ssh() + raise OrchestratorError('ssh connection %s@%s failed' % (self.ssh_user, host)) + self.log.info(f'Set ssh {what}') + + @orchestrator._cli_write_command( + prefix='cephadm set-ssh-config') + def _set_ssh_config(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """ + Set the ssh_config file (use -i <ssh_config>) + """ + # Set an ssh_config file provided from stdin + + old = self.ssh_config + if inbuf == old: + return 0, "value unchanged", "" + self.validate_ssh_config_content(inbuf) + self._validate_and_set_ssh_val('ssh_config', inbuf, old) + return 0, "", "" + + @orchestrator._cli_write_command('cephadm clear-ssh-config') + def _clear_ssh_config(self) -> Tuple[int, str, str]: + """ + Clear the ssh_config file + """ + # Clear the ssh_config file provided from stdin + self.set_store("ssh_config", None) + self.ssh_config_tmp = None + self.log.info('Cleared ssh_config') + self._reconfig_ssh() + return 0, "", "" + + @orchestrator._cli_read_command('cephadm get-ssh-config') + def _get_ssh_config(self) -> HandleCommandResult: + """ + Returns the ssh config as used by cephadm + """ + if self.ssh_config_file: + self.validate_ssh_config_fname(self.ssh_config_file) + with open(self.ssh_config_file) as f: + return HandleCommandResult(stdout=f.read()) + ssh_config = self.get_store("ssh_config") + if ssh_config: + return HandleCommandResult(stdout=ssh_config) + return HandleCommandResult(stdout=DEFAULT_SSH_CONFIG) + + @orchestrator._cli_write_command('cephadm generate-key') + def _generate_key(self) -> Tuple[int, str, str]: + """ + Generate a cluster SSH key (if not present) + """ + if not self.ssh_pub or not self.ssh_key: + self.log.info('Generating ssh key...') + tmp_dir = TemporaryDirectory() + path = tmp_dir.name + '/key' + try: + subprocess.check_call([ + '/usr/bin/ssh-keygen', + '-C', 'ceph-%s' % self._cluster_fsid, + '-N', '', + '-f', path + ]) + with open(path, 'r') as f: + secret = f.read() + with open(path + '.pub', 'r') as f: + pub = f.read() + finally: + os.unlink(path) + os.unlink(path + '.pub') + tmp_dir.cleanup() + self.set_store('ssh_identity_key', secret) + self.set_store('ssh_identity_pub', pub) + self._reconfig_ssh() + return 0, '', '' + + @orchestrator._cli_write_command( + 'cephadm set-priv-key') + def _set_priv_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """Set cluster SSH private key (use -i <private_key>)""" + if inbuf is None or len(inbuf) == 0: + return -errno.EINVAL, "", "empty private ssh key provided" + old = self.ssh_key + if inbuf == old: + return 0, "value unchanged", "" + self._validate_and_set_ssh_val('ssh_identity_key', inbuf, old) + self.log.info('Set ssh private key') + return 0, "", "" + + @orchestrator._cli_write_command( + 'cephadm set-pub-key') + def _set_pub_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """Set cluster SSH public key (use -i <public_key>)""" + if inbuf is None or len(inbuf) == 0: + return -errno.EINVAL, "", "empty public ssh key provided" + old = self.ssh_pub + if inbuf == old: + return 0, "value unchanged", "" + self._validate_and_set_ssh_val('ssh_identity_pub', inbuf, old) + return 0, "", "" + + @orchestrator._cli_write_command( + 'cephadm clear-key') + def _clear_key(self) -> Tuple[int, str, str]: + """Clear cluster SSH key""" + self.set_store('ssh_identity_key', None) + self.set_store('ssh_identity_pub', None) + self._reconfig_ssh() + self.log.info('Cleared cluster SSH key') + return 0, '', '' + + @orchestrator._cli_read_command( + 'cephadm get-pub-key') + def _get_pub_key(self) -> Tuple[int, str, str]: + """Show SSH public key for connecting to cluster hosts""" + if self.ssh_pub: + return 0, self.ssh_pub, '' + else: + return -errno.ENOENT, '', 'No cluster SSH key defined' + + @orchestrator._cli_read_command( + 'cephadm get-user') + def _get_user(self) -> Tuple[int, str, str]: + """ + Show user for SSHing to cluster hosts + """ + if self.ssh_user is None: + return -errno.ENOENT, '', 'No cluster SSH user configured' + else: + return 0, self.ssh_user, '' + + @orchestrator._cli_read_command( + 'cephadm set-user') + def set_ssh_user(self, user: str) -> Tuple[int, str, str]: + """ + Set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users + """ + current_user = self.ssh_user + if user == current_user: + return 0, "value unchanged", "" + + self._validate_and_set_ssh_val('ssh_user', user, current_user) + current_ssh_config = self._get_ssh_config() + new_ssh_config = re.sub(r"(\s{2}User\s)(.*)", r"\1" + user, current_ssh_config.stdout) + self._set_ssh_config(new_ssh_config) + + msg = 'ssh user set to %s' % user + if user != 'root': + msg += '. sudo will be used' + self.log.info(msg) + return 0, msg, '' + + @orchestrator._cli_read_command( + 'cephadm registry-login') + def registry_login(self, url: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """ + Set custom registry login info by providing url, username and password or json file with login info (-i <file>) + """ + # if password not given in command line, get it through file input + if not (url and username and password) and (inbuf is None or len(inbuf) == 0): + return -errno.EINVAL, "", ("Invalid arguments. Please provide arguments <url> <username> <password> " + "or -i <login credentials json file>") + elif (url and username and password): + registry_json = {'url': url, 'username': username, 'password': password} + else: + assert isinstance(inbuf, str) + registry_json = json.loads(inbuf) + if "url" not in registry_json or "username" not in registry_json or "password" not in registry_json: + return -errno.EINVAL, "", ("json provided for custom registry login did not include all necessary fields. " + "Please setup json file as\n" + "{\n" + " \"url\": \"REGISTRY_URL\",\n" + " \"username\": \"REGISTRY_USERNAME\",\n" + " \"password\": \"REGISTRY_PASSWORD\"\n" + "}\n") + + # verify login info works by attempting login on random host + host = None + for host_name in self.inventory.keys(): + host = host_name + break + if not host: + raise OrchestratorError('no hosts defined') + r = CephadmServe(self)._registry_login(host, registry_json) + if r is not None: + return 1, '', r + # if logins succeeded, store info + self.log.debug("Host logins successful. Storing login info.") + self.set_store('registry_credentials', json.dumps(registry_json)) + # distribute new login info to all hosts + self.cache.distribute_new_registry_login_info() + return 0, "registry login scheduled", '' + + @orchestrator._cli_read_command('cephadm check-host') + def check_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]: + """Check whether we can access and manage a remote host""" + try: + out, err, code = CephadmServe(self)._run_cephadm(host, cephadmNoImage, 'check-host', + ['--expect-hostname', host], + addr=addr, + error_ok=True, no_fsid=True) + if code: + return 1, '', ('check-host failed:\n' + '\n'.join(err)) + except OrchestratorError: + self.log.exception(f"check-host failed for '{host}'") + return 1, '', ('check-host failed:\n' + + f"Host '{host}' not found. Use 'ceph orch host ls' to see all managed hosts.") + # if we have an outstanding health alert for this host, give the + # serve thread a kick + if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks: + for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']: + if item.startswith('host %s ' % host): + self.event.set() + return 0, '%s (%s) ok' % (host, addr), '\n'.join(err) + + @orchestrator._cli_read_command( + 'cephadm prepare-host') + def _prepare_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]: + """Prepare a remote host for use with cephadm""" + out, err, code = CephadmServe(self)._run_cephadm(host, cephadmNoImage, 'prepare-host', + ['--expect-hostname', host], + addr=addr, + error_ok=True, no_fsid=True) + if code: + return 1, '', ('prepare-host failed:\n' + '\n'.join(err)) + # if we have an outstanding health alert for this host, give the + # serve thread a kick + if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks: + for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']: + if item.startswith('host %s ' % host): + self.event.set() + return 0, '%s (%s) ok' % (host, addr), '\n'.join(err) + + @orchestrator._cli_write_command( + prefix='cephadm set-extra-ceph-conf') + def _set_extra_ceph_conf(self, inbuf: Optional[str] = None) -> HandleCommandResult: + """ + Text that is appended to all daemon's ceph.conf. + Mainly a workaround, till `config generate-minimal-conf` generates + a complete ceph.conf. + + Warning: this is a dangerous operation. + """ + if inbuf: + # sanity check. + cp = ConfigParser() + cp.read_string(inbuf, source='<infile>') + + self.set_store("extra_ceph_conf", json.dumps({ + 'conf': inbuf, + 'last_modified': datetime_to_str(datetime_now()) + })) + self.log.info('Set extra_ceph_conf') + self._kick_serve_loop() + return HandleCommandResult() + + @orchestrator._cli_read_command( + 'cephadm get-extra-ceph-conf') + def _get_extra_ceph_conf(self) -> HandleCommandResult: + """ + Get extra ceph conf that is appended + """ + return HandleCommandResult(stdout=self.extra_ceph_conf().conf) + + def _set_exporter_config(self, config: Dict[str, str]) -> None: + self.set_store('exporter_config', json.dumps(config)) + + def _get_exporter_config(self) -> Dict[str, str]: + cfg_str = self.get_store('exporter_config') + return json.loads(cfg_str) if cfg_str else {} + + def _set_exporter_option(self, option: str, value: Optional[str] = None) -> None: + kv_option = f'exporter_{option}' + self.set_store(kv_option, value) + + def _get_exporter_option(self, option: str) -> Optional[str]: + kv_option = f'exporter_{option}' + return self.get_store(kv_option) + + @orchestrator._cli_write_command( + prefix='cephadm generate-exporter-config') + @service_inactive('cephadm-exporter') + def _generate_exporter_config(self) -> Tuple[int, str, str]: + """ + Generate default SSL crt/key and token for cephadm exporter daemons + """ + self._set_exporter_defaults() + self.log.info('Default settings created for cephadm exporter(s)') + return 0, "", "" + + def _set_exporter_defaults(self) -> None: + crt, key = self._generate_exporter_ssl() + token = self._generate_exporter_token() + self._set_exporter_config({ + "crt": crt, + "key": key, + "token": token, + "port": CephadmExporterConfig.DEFAULT_PORT + }) + self._set_exporter_option('enabled', 'true') + + def _generate_exporter_ssl(self) -> Tuple[str, str]: + return create_self_signed_cert(dname={"O": "Ceph", "OU": "cephadm-exporter"}) + + def _generate_exporter_token(self) -> str: + return secrets.token_hex(32) + + @orchestrator._cli_write_command( + prefix='cephadm clear-exporter-config') + @service_inactive('cephadm-exporter') + def _clear_exporter_config(self) -> Tuple[int, str, str]: + """ + Clear the SSL configuration used by cephadm exporter daemons + """ + self._clear_exporter_config_settings() + self.log.info('Cleared cephadm exporter configuration') + return 0, "", "" + + def _clear_exporter_config_settings(self) -> None: + self.set_store('exporter_config', None) + self._set_exporter_option('enabled', None) + + @orchestrator._cli_write_command( + prefix='cephadm set-exporter-config') + @service_inactive('cephadm-exporter') + def _store_exporter_config(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + """ + Set custom cephadm-exporter configuration from a json file (-i <file>). JSON must contain crt, key, token and port + """ + if not inbuf: + return 1, "", "JSON configuration has not been provided (-i <filename>)" + + cfg = CephadmExporterConfig(self) + rc, reason = cfg.load_from_json(inbuf) + if rc: + return 1, "", reason + + rc, reason = cfg.validate_config() + if rc: + return 1, "", reason + + self._set_exporter_config({ + "crt": cfg.crt, + "key": cfg.key, + "token": cfg.token, + "port": cfg.port + }) + self.log.info("Loaded and verified the TLS configuration") + return 0, "", "" + + @orchestrator._cli_read_command( + 'cephadm get-exporter-config') + def _show_exporter_config(self) -> Tuple[int, str, str]: + """ + Show the current cephadm-exporter configuraion (JSON)' + """ + cfg = self._get_exporter_config() + return 0, json.dumps(cfg, indent=2), "" + + @orchestrator._cli_read_command('cephadm config-check ls') + def _config_checks_list(self, format: Format = Format.plain) -> HandleCommandResult: + """List the available configuration checks and their current state""" + + if format not in [Format.plain, Format.json, Format.json_pretty]: + return HandleCommandResult( + retval=1, + stderr="Requested format is not supported when listing configuration checks" + ) + + if format in [Format.json, Format.json_pretty]: + return HandleCommandResult( + stdout=to_format(self.config_checker.health_checks, + format, + many=True, + cls=None)) + + # plain formatting + table = PrettyTable( + ['NAME', + 'HEALTHCHECK', + 'STATUS', + 'DESCRIPTION' + ], border=False) + table.align['NAME'] = 'l' + table.align['HEALTHCHECK'] = 'l' + table.align['STATUS'] = 'l' + table.align['DESCRIPTION'] = 'l' + table.left_padding_width = 0 + table.right_padding_width = 2 + for c in self.config_checker.health_checks: + table.add_row(( + c.name, + c.healthcheck_name, + c.status, + c.description, + )) + + return HandleCommandResult(stdout=table.get_string()) + + @orchestrator._cli_read_command('cephadm config-check status') + def _config_check_status(self) -> HandleCommandResult: + """Show whether the configuration checker feature is enabled/disabled""" + status = self.get_module_option('config_checks_enabled') + return HandleCommandResult(stdout="Enabled" if status else "Disabled") + + @orchestrator._cli_write_command('cephadm config-check enable') + def _config_check_enable(self, check_name: str) -> HandleCommandResult: + """Enable a specific configuration check""" + if not self._config_check_valid(check_name): + return HandleCommandResult(retval=1, stderr="Invalid check name") + + err, msg = self._update_config_check(check_name, 'enabled') + if err: + return HandleCommandResult( + retval=err, + stderr=f"Failed to enable check '{check_name}' : {msg}") + + return HandleCommandResult(stdout="ok") + + @orchestrator._cli_write_command('cephadm config-check disable') + def _config_check_disable(self, check_name: str) -> HandleCommandResult: + """Disable a specific configuration check""" + if not self._config_check_valid(check_name): + return HandleCommandResult(retval=1, stderr="Invalid check name") + + err, msg = self._update_config_check(check_name, 'disabled') + if err: + return HandleCommandResult(retval=err, stderr=f"Failed to disable check '{check_name}': {msg}") + else: + # drop any outstanding raised healthcheck for this check + config_check = self.config_checker.lookup_check(check_name) + if config_check: + if config_check.healthcheck_name in self.health_checks: + self.health_checks.pop(config_check.healthcheck_name, None) + self.set_health_checks(self.health_checks) + else: + self.log.error( + f"Unable to resolve a check name ({check_name}) to a healthcheck definition?") + + return HandleCommandResult(stdout="ok") + + def _config_check_valid(self, check_name: str) -> bool: + return check_name in [chk.name for chk in self.config_checker.health_checks] + + def _update_config_check(self, check_name: str, status: str) -> Tuple[int, str]: + checks_raw = self.get_store('config_checks') + if not checks_raw: + return 1, "config_checks setting is not available" + + checks = json.loads(checks_raw) + checks.update({ + check_name: status + }) + self.log.info(f"updated config check '{check_name}' : {status}") + self.set_store('config_checks', json.dumps(checks)) + return 0, "" + + class ExtraCephConf(NamedTuple): + conf: str + last_modified: Optional[datetime.datetime] + + def extra_ceph_conf(self) -> 'CephadmOrchestrator.ExtraCephConf': + data = self.get_store('extra_ceph_conf') + if not data: + return CephadmOrchestrator.ExtraCephConf('', None) + try: + j = json.loads(data) + except ValueError: + msg = 'Unable to load extra_ceph_conf: Cannot decode JSON' + self.log.exception('%s: \'%s\'', msg, data) + return CephadmOrchestrator.ExtraCephConf('', None) + return CephadmOrchestrator.ExtraCephConf(j['conf'], str_to_datetime(j['last_modified'])) + + def extra_ceph_conf_is_newer(self, dt: datetime.datetime) -> bool: + conf = self.extra_ceph_conf() + if not conf.last_modified: + return False + return conf.last_modified > dt + + @orchestrator._cli_write_command( + 'cephadm osd activate' + ) + def _osd_activate(self, host: List[str]) -> HandleCommandResult: + """ + Start OSD containers for existing OSDs + """ + + @forall_hosts + def run(h: str) -> str: + return self.osd_service.deploy_osd_daemons_for_existing_osds(h, 'osd') + + return HandleCommandResult(stdout='\n'.join(run(host))) + + @orchestrator._cli_read_command('orch client-keyring ls') + def _client_keyring_ls(self, format: Format = Format.plain) -> HandleCommandResult: + """ + List client keyrings under cephadm management + """ + if format != Format.plain: + output = to_format(self.keys.keys.values(), format, many=True, cls=ClientKeyringSpec) + else: + table = PrettyTable( + ['ENTITY', 'PLACEMENT', 'MODE', 'OWNER', 'PATH'], + border=False) + table.align = 'l' + table.left_padding_width = 0 + table.right_padding_width = 2 + for ks in sorted(self.keys.keys.values(), key=lambda ks: ks.entity): + table.add_row(( + ks.entity, ks.placement.pretty_str(), + utils.file_mode_to_str(ks.mode), + f'{ks.uid}:{ks.gid}', + ks.path, + )) + output = table.get_string() + return HandleCommandResult(stdout=output) + + @orchestrator._cli_write_command('orch client-keyring set') + def _client_keyring_set( + self, + entity: str, + placement: str, + owner: Optional[str] = None, + mode: Optional[str] = None, + ) -> HandleCommandResult: + """ + Add or update client keyring under cephadm management + """ + if not entity.startswith('client.'): + raise OrchestratorError('entity must start with client.') + if owner: + try: + uid, gid = map(int, owner.split(':')) + except Exception: + raise OrchestratorError('owner must look like "<uid>:<gid>", e.g., "0:0"') + else: + uid = 0 + gid = 0 + if mode: + try: + imode = int(mode, 8) + except Exception: + raise OrchestratorError('mode must be an octal mode, e.g. "600"') + else: + imode = 0o600 + pspec = PlacementSpec.from_string(placement) + ks = ClientKeyringSpec(entity, pspec, mode=imode, uid=uid, gid=gid) + self.keys.update(ks) + self._kick_serve_loop() + return HandleCommandResult() + + @orchestrator._cli_write_command('orch client-keyring rm') + def _client_keyring_rm( + self, + entity: str, + ) -> HandleCommandResult: + """ + Remove client keyring from cephadm management + """ + self.keys.rm(entity) + self._kick_serve_loop() + return HandleCommandResult() + + def _get_connection(self, host: str) -> Tuple['remoto.backends.BaseConnection', + 'remoto.backends.LegacyModuleExecute']: + """ + Setup a connection for running commands on remote host. + """ + conn, r = self._cons.get(host, (None, None)) + if conn: + if conn.has_connection(): + self.log.debug('Have connection to %s' % host) + return conn, r + else: + self._reset_con(host) + assert self.ssh_user + n = self.ssh_user + '@' + host + self.log.debug("Opening connection to {} with ssh options '{}'".format( + n, self._ssh_options)) + child_logger = self.log.getChild(n) + child_logger.setLevel('WARNING') + conn = remoto.Connection( + n, + logger=child_logger, + ssh_options=self._ssh_options, + sudo=True if self.ssh_user != 'root' else False) + + r = conn.import_module(remotes) + self._cons[host] = conn, r + + return conn, r + + def _executable_path(self, conn: 'remoto.backends.BaseConnection', executable: str) -> str: + """ + Remote validator that accepts a connection object to ensure that a certain + executable is available returning its full path if so. + + Otherwise an exception with thorough details will be raised, informing the + user that the executable was not found. + """ + executable_path = conn.remote_module.which(executable) + if not executable_path: + raise RuntimeError("Executable '{}' not found on host '{}'".format( + executable, conn.hostname)) + self.log.debug("Found executable '{}' at path '{}'".format(executable, + executable_path)) + return executable_path + + def _get_container_image(self, daemon_name: str) -> Optional[str]: + daemon_type = daemon_name.split('.', 1)[0] # type: ignore + image: Optional[str] = None + if daemon_type in CEPH_IMAGE_TYPES: + # get container image + image = str(self.get_foreign_ceph_option( + utils.name_to_config_section(daemon_name), + 'container_image' + )).strip() + elif daemon_type == 'prometheus': + image = self.container_image_prometheus + elif daemon_type == 'grafana': + image = self.container_image_grafana + elif daemon_type == 'alertmanager': + image = self.container_image_alertmanager + elif daemon_type == 'node-exporter': + image = self.container_image_node_exporter + elif daemon_type == 'haproxy': + image = self.container_image_haproxy + elif daemon_type == 'keepalived': + image = self.container_image_keepalived + elif daemon_type == CustomContainerService.TYPE: + # The image can't be resolved, the necessary information + # is only available when a container is deployed (given + # via spec). + image = None + elif daemon_type == 'snmp-gateway': + image = self.container_image_snmp_gateway + else: + assert False, daemon_type + + self.log.debug('%s container image %s' % (daemon_name, image)) + + return image + + def _schedulable_hosts(self) -> List[HostSpec]: + """ + Returns all usable hosts that went through _refresh_host_daemons(). + + This mitigates a potential race, where new host was added *after* + ``_refresh_host_daemons()`` was called, but *before* + ``_apply_all_specs()`` was called. thus we end up with a hosts + where daemons might be running, but we have not yet detected them. + """ + return [ + h for h in self.inventory.all_specs() + if ( + self.cache.host_had_daemon_refresh(h.hostname) + and '_no_schedule' not in h.labels + ) + ] + + def _unreachable_hosts(self) -> List[HostSpec]: + """ + Return all hosts that are offline or in maintenance mode. + + The idea is we should not touch the daemons on these hosts (since + in theory the hosts are inaccessible so we CAN'T touch them) but + we still want to count daemons that exist on these hosts toward the + placement so daemons on these hosts aren't just moved elsewhere + """ + return [ + h for h in self.inventory.all_specs() + if ( + h.status.lower() in ['maintenance', 'offline'] + or h.hostname in self.offline_hosts + ) + ] + + def _check_valid_addr(self, host: str, addr: str) -> str: + # make sure hostname is resolvable before trying to make a connection + try: + ip_addr = utils.resolve_ip(addr) + except OrchestratorError as e: + msg = str(e) + f''' +You may need to supply an address for {addr} + +Please make sure that the host is reachable and accepts connections using the cephadm SSH key +To add the cephadm SSH key to the host: +> ceph cephadm get-pub-key > ~/ceph.pub +> ssh-copy-id -f -i ~/ceph.pub {self.ssh_user}@{addr} + +To check that the host is reachable open a new shell with the --no-hosts flag: +> cephadm shell --no-hosts + +Then run the following: +> ceph cephadm get-ssh-config > ssh_config +> ceph config-key get mgr/cephadm/ssh_identity_key > ~/cephadm_private_key +> chmod 0600 ~/cephadm_private_key +> ssh -F ssh_config -i ~/cephadm_private_key {self.ssh_user}@{addr}''' + raise OrchestratorError(msg) + + if ipaddress.ip_address(ip_addr).is_loopback and host == addr: + # if this is a re-add, use old address. otherwise error + if host not in self.inventory or self.inventory.get_addr(host) == host: + raise OrchestratorError( + (f'Cannot automatically resolve ip address of host {host}. Ip resolved to loopback address: {ip_addr}\n' + + f'Please explicitly provide the address (ceph orch host add {host} --addr <ip-addr>)')) + self.log.debug( + f'Received loopback address resolving ip for {host}: {ip_addr}. Falling back to previous address.') + ip_addr = self.inventory.get_addr(host) + out, err, code = CephadmServe(self)._run_cephadm( + host, cephadmNoImage, 'check-host', + ['--expect-hostname', host], + addr=addr, + error_ok=True, no_fsid=True) + if code: + msg = 'check-host failed:\n' + '\n'.join(err) + # err will contain stdout and stderr, so we filter on the message text to + # only show the errors + errors = [_i.replace("ERROR: ", "") for _i in err if _i.startswith('ERROR')] + if errors: + msg = f'Host {host} ({addr}) failed check(s): {errors}' + raise OrchestratorError(msg) + return ip_addr + + def _add_host(self, spec): + # type: (HostSpec) -> str + """ + Add a host to be managed by the orchestrator. + + :param host: host name + """ + HostSpec.validate(spec) + ip_addr = self._check_valid_addr(spec.hostname, spec.addr) + if spec.addr == spec.hostname and ip_addr: + spec.addr = ip_addr + + if spec.hostname in self.inventory and self.inventory.get_addr(spec.hostname) != spec.addr: + self.cache.refresh_all_host_info(spec.hostname) + + # prime crush map? + if spec.location: + self.check_mon_command({ + 'prefix': 'osd crush add-bucket', + 'name': spec.hostname, + 'type': 'host', + 'args': [f'{k}={v}' for k, v in spec.location.items()], + }) + + if spec.hostname not in self.inventory: + self.cache.prime_empty_host(spec.hostname) + self.inventory.add_host(spec) + self.offline_hosts_remove(spec.hostname) + if spec.status == 'maintenance': + self._set_maintenance_healthcheck() + self.event.set() # refresh stray health check + self.log.info('Added host %s' % spec.hostname) + return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr) + + @handle_orch_error + def add_host(self, spec: HostSpec) -> str: + return self._add_host(spec) + + @handle_orch_error + def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str: + """ + Remove a host from orchestrator management. + + :param host: host name + :param force: bypass running daemons check + :param offline: remove offline host + """ + + # check if host is offline + host_offline = host in self.offline_hosts + + if host_offline and not offline: + raise OrchestratorValidationError( + "{} is offline, please use --offline and --force to remove this host. This can potentially cause data loss".format(host)) + + if not host_offline and offline: + raise OrchestratorValidationError( + "{} is online, please remove host without --offline.".format(host)) + + if offline and not force: + raise OrchestratorValidationError("Removing an offline host requires --force") + + # check if there are daemons on the host + if not force: + daemons = self.cache.get_daemons_by_host(host) + if daemons: + self.log.warning(f"Blocked {host} removal. Daemons running: {daemons}") + + daemons_table = "" + daemons_table += "{:<20} {:<15}\n".format("type", "id") + daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15) + for d in daemons: + daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id) + + raise OrchestratorValidationError("Not allowed to remove %s from cluster. " + "The following daemons are running in the host:" + "\n%s\nPlease run 'ceph orch host drain %s' to remove daemons from host" % ( + host, daemons_table, host)) + + # check, if there we're removing the last _admin host + if not force: + p = PlacementSpec(label='_admin') + admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) + if len(admin_hosts) == 1 and admin_hosts[0] == host: + raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'" + " label. Please add the '_admin' label to a host" + " or add --force to this command") + + def run_cmd(cmd_args: dict) -> None: + ret, out, err = self.mon_command(cmd_args) + if ret != 0: + self.log.debug(f"ran {cmd_args} with mon_command") + self.log.error( + f"cmd: {cmd_args.get('prefix')} failed with: {err}. (errno:{ret})") + self.log.debug(f"cmd: {cmd_args.get('prefix')} returns: {out}") + + if offline: + daemons = self.cache.get_daemons_by_host(host) + for d in daemons: + self.log.info(f"removing: {d.name()}") + + if d.daemon_type != 'osd': + self.cephadm_services[str(d.daemon_type)].pre_remove(d) + self.cephadm_services[str(d.daemon_type)].post_remove(d, is_failed_deploy=False) + else: + cmd_args = { + 'prefix': 'osd purge-actual', + 'id': int(str(d.daemon_id)), + 'yes_i_really_mean_it': True + } + run_cmd(cmd_args) + + cmd_args = { + 'prefix': 'osd crush rm', + 'name': host + } + run_cmd(cmd_args) + + self.inventory.rm_host(host) + self.cache.rm_host(host) + self._reset_con(host) + self.event.set() # refresh stray health check + self.log.info('Removed host %s' % host) + return "Removed {} host '{}'".format('offline' if offline else '', host) + + @handle_orch_error + def update_host_addr(self, host: str, addr: str) -> str: + self._check_valid_addr(host, addr) + self.inventory.set_addr(host, addr) + self._reset_con(host) + self.event.set() # refresh stray health check + self.log.info('Set host %s addr to %s' % (host, addr)) + return "Updated host '{}' addr to '{}'".format(host, addr) + + @handle_orch_error + def get_hosts(self): + # type: () -> List[orchestrator.HostSpec] + """ + Return a list of hosts managed by the orchestrator. + + Notes: + - skip async: manager reads from cache. + """ + return list(self.inventory.all_specs()) + + @handle_orch_error + def get_facts(self, hostname: Optional[str] = None) -> List[Dict[str, Any]]: + """ + Return a list of hosts metadata(gather_facts) managed by the orchestrator. + + Notes: + - skip async: manager reads from cache. + """ + if hostname: + return [self.cache.get_facts(hostname)] + + return [self.cache.get_facts(hostname) for hostname in self.cache.get_hosts()] + + @handle_orch_error + def add_host_label(self, host: str, label: str) -> str: + self.inventory.add_label(host, label) + self.log.info('Added label %s to host %s' % (label, host)) + self._kick_serve_loop() + return 'Added label %s to host %s' % (label, host) + + @handle_orch_error + def remove_host_label(self, host: str, label: str, force: bool = False) -> str: + # if we remove the _admin label from the only host that has it we could end up + # removing the only instance of the config and keyring and cause issues + if not force and label == '_admin': + p = PlacementSpec(label='_admin') + admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) + if len(admin_hosts) == 1 and admin_hosts[0] == host: + raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'" + " label.\nRemoving the _admin label from this host could cause the removal" + " of the last cluster config/keyring managed by cephadm.\n" + "It is recommended to add the _admin label to another host" + " before completing this operation.\nIf you're certain this is" + " what you want rerun this command with --force.") + self.inventory.rm_label(host, label) + self.log.info('Removed label %s to host %s' % (label, host)) + self._kick_serve_loop() + return 'Removed label %s from host %s' % (label, host) + + def _host_ok_to_stop(self, hostname: str, force: bool = False) -> Tuple[int, str]: + self.log.debug("running host-ok-to-stop checks") + daemons = self.cache.get_daemons() + daemon_map: Dict[str, List[str]] = defaultdict(lambda: []) + for dd in daemons: + assert dd.hostname is not None + assert dd.daemon_type is not None + assert dd.daemon_id is not None + if dd.hostname == hostname: + daemon_map[dd.daemon_type].append(dd.daemon_id) + + notifications: List[str] = [] + error_notifications: List[str] = [] + okay: bool = True + for daemon_type, daemon_ids in daemon_map.items(): + r = self.cephadm_services[daemon_type_to_service( + daemon_type)].ok_to_stop(daemon_ids, force=force) + if r.retval: + okay = False + # collect error notifications so user can see every daemon causing host + # to not be okay to stop + error_notifications.append(r.stderr) + if r.stdout: + # if extra notifications to print for user, add them to notifications list + notifications.append(r.stdout) + + if not okay: + # at least one daemon is not okay to stop + return 1, '\n'.join(error_notifications) + + if notifications: + return 0, (f'It is presumed safe to stop host {hostname}. ' + + 'Note the following:\n\n' + '\n'.join(notifications)) + return 0, f'It is presumed safe to stop host {hostname}' + + @handle_orch_error + def host_ok_to_stop(self, hostname: str) -> str: + if hostname not in self.cache.get_hosts(): + raise OrchestratorError(f'Cannot find host "{hostname}"') + + rc, msg = self._host_ok_to_stop(hostname) + if rc: + raise OrchestratorError(msg, errno=rc) + + self.log.info(msg) + return msg + + def _set_maintenance_healthcheck(self) -> None: + """Raise/update or clear the maintenance health check as needed""" + + in_maintenance = self.inventory.get_host_with_state("maintenance") + if not in_maintenance: + self.remove_health_warning('HOST_IN_MAINTENANCE') + else: + s = "host is" if len(in_maintenance) == 1 else "hosts are" + self.set_health_warning("HOST_IN_MAINTENANCE", f"{len(in_maintenance)} {s} in maintenance mode", 1, [ + f"{h} is in maintenance" for h in in_maintenance]) + + @handle_orch_error + @host_exists() + def enter_host_maintenance(self, hostname: str, force: bool = False) -> str: + """ Attempt to place a cluster host in maintenance + + Placing a host into maintenance disables the cluster's ceph target in systemd + and stops all ceph daemons. If the host is an osd host we apply the noout flag + for the host subtree in crush to prevent data movement during a host maintenance + window. + + :param hostname: (str) name of the host (must match an inventory hostname) + + :raises OrchestratorError: Hostname is invalid, host is already in maintenance + """ + if len(self.cache.get_hosts()) == 1: + raise OrchestratorError("Maintenance feature is not supported on single node clusters") + + # if upgrade is active, deny + if self.upgrade.upgrade_state: + raise OrchestratorError( + f"Unable to place {hostname} in maintenance with upgrade active/paused") + + tgt_host = self.inventory._inventory[hostname] + if tgt_host.get("status", "").lower() == "maintenance": + raise OrchestratorError(f"Host {hostname} is already in maintenance") + + host_daemons = self.cache.get_daemon_types(hostname) + self.log.debug("daemons on host {}".format(','.join(host_daemons))) + if host_daemons: + # daemons on this host, so check the daemons can be stopped + # and if so, place the host into maintenance by disabling the target + rc, msg = self._host_ok_to_stop(hostname, force) + if rc: + raise OrchestratorError( + msg + '\nNote: Warnings can be bypassed with the --force flag', errno=rc) + + # call the host-maintenance function + _out, _err, _code = CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, "host-maintenance", + ["enter"], + error_ok=True) + returned_msg = _err[0].split('\n')[-1] + if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'): + raise OrchestratorError( + f"Failed to place {hostname} into maintenance for cluster {self._cluster_fsid}") + + if "osd" in host_daemons: + crush_node = hostname if '.' not in hostname else hostname.split('.')[0] + rc, out, err = self.mon_command({ + 'prefix': 'osd set-group', + 'flags': 'noout', + 'who': [crush_node], + 'format': 'json' + }) + if rc: + self.log.warning( + f"maintenance mode request for {hostname} failed to SET the noout group (rc={rc})") + raise OrchestratorError( + f"Unable to set the osds on {hostname} to noout (rc={rc})") + else: + self.log.info( + f"maintenance mode request for {hostname} has SET the noout group") + + # update the host status in the inventory + tgt_host["status"] = "maintenance" + self.inventory._inventory[hostname] = tgt_host + self.inventory.save() + + self._set_maintenance_healthcheck() + return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode' + + @handle_orch_error + @host_exists() + def exit_host_maintenance(self, hostname: str) -> str: + """Exit maintenance mode and return a host to an operational state + + Returning from maintnenance will enable the clusters systemd target and + start it, and remove any noout that has been added for the host if the + host has osd daemons + + :param hostname: (str) host name + + :raises OrchestratorError: Unable to return from maintenance, or unset the + noout flag + """ + tgt_host = self.inventory._inventory[hostname] + if tgt_host['status'] != "maintenance": + raise OrchestratorError(f"Host {hostname} is not in maintenance mode") + + outs, errs, _code = CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, 'host-maintenance', + ['exit'], + error_ok=True) + returned_msg = errs[0].split('\n')[-1] + if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'): + raise OrchestratorError( + f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}") + + if "osd" in self.cache.get_daemon_types(hostname): + crush_node = hostname if '.' not in hostname else hostname.split('.')[0] + rc, _out, _err = self.mon_command({ + 'prefix': 'osd unset-group', + 'flags': 'noout', + 'who': [crush_node], + 'format': 'json' + }) + if rc: + self.log.warning( + f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})") + raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})") + else: + self.log.info( + f"exit maintenance request has UNSET for the noout group on host {hostname}") + + # update the host record status + tgt_host['status'] = "" + self.inventory._inventory[hostname] = tgt_host + self.inventory.save() + + self._set_maintenance_healthcheck() + + return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode" + + @handle_orch_error + @host_exists() + def rescan_host(self, hostname: str) -> str: + """Use cephadm to issue a disk rescan on each HBA + + Some HBAs and external enclosures don't automatically register + device insertion with the kernel, so for these scenarios we need + to manually rescan + + :param hostname: (str) host name + """ + self.log.info(f'disk rescan request sent to host "{hostname}"') + _out, _err, _code = CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, "disk-rescan", + [], no_fsid=True, error_ok=True) + if not _err: + raise OrchestratorError('Unexpected response from cephadm disk-rescan call') + + msg = _err[0].split('\n')[-1] + log_msg = f'disk rescan: {msg}' + if msg.upper().startswith('OK'): + self.log.info(log_msg) + else: + self.log.warning(log_msg) + + return f'{msg}' + + def get_minimal_ceph_conf(self) -> str: + _, config, _ = self.check_mon_command({ + "prefix": "config generate-minimal-conf", + }) + extra = self.extra_ceph_conf().conf + if extra: + config += '\n\n' + extra.strip() + '\n' + return config + + def _invalidate_daemons_and_kick_serve(self, filter_host: Optional[str] = None) -> None: + if filter_host: + self.cache.invalidate_host_daemons(filter_host) + else: + for h in self.cache.get_hosts(): + # Also discover daemons deployed manually + self.cache.invalidate_host_daemons(h) + + self._kick_serve_loop() + + @handle_orch_error + def describe_service(self, service_type: Optional[str] = None, service_name: Optional[str] = None, + refresh: bool = False) -> List[orchestrator.ServiceDescription]: + if refresh: + self._invalidate_daemons_and_kick_serve() + self.log.debug('Kicked serve() loop to refresh all services') + + sm: Dict[str, orchestrator.ServiceDescription] = {} + + # known services + for nm, spec in self.spec_store.all_specs.items(): + if service_type is not None and service_type != spec.service_type: + continue + if service_name is not None and service_name != nm: + continue + + if spec.service_type != 'osd': + size = spec.placement.get_target_count(self._schedulable_hosts()) + else: + # osd counting is special + size = 0 + + sm[nm] = orchestrator.ServiceDescription( + spec=spec, + size=size, + running=0, + events=self.events.get_for_service(spec.service_name()), + created=self.spec_store.spec_created[nm], + deleted=self.spec_store.spec_deleted.get(nm, None), + virtual_ip=spec.get_virtual_ip(), + ports=spec.get_port_start(), + ) + if spec.service_type == 'ingress': + # ingress has 2 daemons running per host + sm[nm].size *= 2 + + # factor daemons into status + for h, dm in self.cache.get_daemons_with_volatile_status(): + for name, dd in dm.items(): + assert dd.hostname is not None, f'no hostname for {dd!r}' + assert dd.daemon_type is not None, f'no daemon_type for {dd!r}' + + n: str = dd.service_name() + + if ( + service_type + and service_type != daemon_type_to_service(dd.daemon_type) + ): + continue + if service_name and service_name != n: + continue + + if n not in sm: + # new unmanaged service + spec = ServiceSpec( + unmanaged=True, + service_type=daemon_type_to_service(dd.daemon_type), + service_id=dd.service_id(), + ) + sm[n] = orchestrator.ServiceDescription( + last_refresh=dd.last_refresh, + container_image_id=dd.container_image_id, + container_image_name=dd.container_image_name, + spec=spec, + size=0, + ) + + if dd.status == DaemonDescriptionStatus.running: + sm[n].running += 1 + if dd.daemon_type == 'osd': + # The osd count can't be determined by the Placement spec. + # Showing an actual/expected representation cannot be determined + # here. So we're setting running = size for now. + sm[n].size += 1 + if ( + not sm[n].last_refresh + or not dd.last_refresh + or dd.last_refresh < sm[n].last_refresh # type: ignore + ): + sm[n].last_refresh = dd.last_refresh + + return list(sm.values()) + + @handle_orch_error + def list_daemons(self, + service_name: Optional[str] = None, + daemon_type: Optional[str] = None, + daemon_id: Optional[str] = None, + host: Optional[str] = None, + refresh: bool = False) -> List[orchestrator.DaemonDescription]: + if refresh: + self._invalidate_daemons_and_kick_serve(host) + self.log.debug('Kicked serve() loop to refresh all daemons') + + result = [] + for h, dm in self.cache.get_daemons_with_volatile_status(): + if host and h != host: + continue + for name, dd in dm.items(): + if daemon_type is not None and daemon_type != dd.daemon_type: + continue + if daemon_id is not None and daemon_id != dd.daemon_id: + continue + if service_name is not None and service_name != dd.service_name(): + continue + if not dd.memory_request and dd.daemon_type in ['osd', 'mon']: + dd.memory_request = cast(Optional[int], self.get_foreign_ceph_option( + dd.name(), + f"{dd.daemon_type}_memory_target" + )) + result.append(dd) + return result + + @handle_orch_error + def service_action(self, action: str, service_name: str) -> List[str]: + if service_name not in self.spec_store.all_specs.keys(): + raise OrchestratorError(f'Invalid service name "{service_name}".' + + ' View currently running services using "ceph orch ls"') + dds: List[DaemonDescription] = self.cache.get_daemons_by_service(service_name) + if not dds: + raise OrchestratorError(f'No daemons exist under service name "{service_name}".' + + ' View currently running services using "ceph orch ls"') + if action == 'stop' and service_name.split('.')[0].lower() in ['mgr', 'mon', 'osd']: + return [f'Stopping entire {service_name} service is prohibited.'] + self.log.info('%s service %s' % (action.capitalize(), service_name)) + return [ + self._schedule_daemon_action(dd.name(), action) + for dd in dds + ] + + def _daemon_action(self, + daemon_spec: CephadmDaemonDeploySpec, + action: str, + image: Optional[str] = None) -> str: + self._daemon_action_set_image(action, image, daemon_spec.daemon_type, + daemon_spec.daemon_id) + + if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(daemon_spec.daemon_type, + daemon_spec.daemon_id): + self.mgr_service.fail_over() + return '' # unreachable + + if action == 'redeploy' or action == 'reconfig': + if daemon_spec.daemon_type != 'osd': + daemon_spec = self.cephadm_services[daemon_type_to_service( + daemon_spec.daemon_type)].prepare_create(daemon_spec) + else: + # for OSDs, we still need to update config, just not carry out the full + # prepare_create function + daemon_spec.final_config, daemon_spec.deps = self.osd_service.generate_config(daemon_spec) + return CephadmServe(self)._create_daemon(daemon_spec, reconfig=(action == 'reconfig')) + + actions = { + 'start': ['reset-failed', 'start'], + 'stop': ['stop'], + 'restart': ['reset-failed', 'restart'], + } + name = daemon_spec.name() + for a in actions[action]: + try: + out, err, code = CephadmServe(self)._run_cephadm( + daemon_spec.host, name, 'unit', + ['--name', name, a]) + except Exception: + self.log.exception(f'`{daemon_spec.host}: cephadm unit {name} {a}` failed') + self.cache.invalidate_host_daemons(daemon_spec.host) + msg = "{} {} from host '{}'".format(action, name, daemon_spec.host) + self.events.for_daemon(name, 'INFO', msg) + return msg + + def _daemon_action_set_image(self, action: str, image: Optional[str], daemon_type: str, daemon_id: str) -> None: + if image is not None: + if action != 'redeploy': + raise OrchestratorError( + f'Cannot execute {action} with new image. `action` needs to be `redeploy`') + if daemon_type not in CEPH_IMAGE_TYPES: + raise OrchestratorError( + f'Cannot redeploy {daemon_type}.{daemon_id} with a new image: Supported ' + f'types are: {", ".join(CEPH_IMAGE_TYPES)}') + + self.check_mon_command({ + 'prefix': 'config set', + 'name': 'container_image', + 'value': image, + 'who': utils.name_to_config_section(daemon_type + '.' + daemon_id), + }) + + @handle_orch_error + def daemon_action(self, action: str, daemon_name: str, image: Optional[str] = None) -> str: + d = self.cache.get_daemon(daemon_name) + assert d.daemon_type is not None + assert d.daemon_id is not None + + if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(d.daemon_type, d.daemon_id) \ + and not self.mgr_service.mgr_map_has_standby(): + raise OrchestratorError( + f'Unable to schedule redeploy for {daemon_name}: No standby MGRs') + + self._daemon_action_set_image(action, image, d.daemon_type, d.daemon_id) + + self.log.info(f'Schedule {action} daemon {daemon_name}') + return self._schedule_daemon_action(daemon_name, action) + + def daemon_is_self(self, daemon_type: str, daemon_id: str) -> bool: + return daemon_type == 'mgr' and daemon_id == self.get_mgr_id() + + def get_active_mgr_digests(self) -> List[str]: + digests = self.mgr_service.get_active_daemon( + self.cache.get_daemons_by_type('mgr')).container_image_digests + return digests if digests else [] + + def _schedule_daemon_action(self, daemon_name: str, action: str) -> str: + dd = self.cache.get_daemon(daemon_name) + assert dd.daemon_type is not None + assert dd.daemon_id is not None + assert dd.hostname is not None + if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(dd.daemon_type, dd.daemon_id) \ + and not self.mgr_service.mgr_map_has_standby(): + raise OrchestratorError( + f'Unable to schedule redeploy for {daemon_name}: No standby MGRs') + self.cache.schedule_daemon_action(dd.hostname, dd.name(), action) + msg = "Scheduled to {} {} on host '{}'".format(action, daemon_name, dd.hostname) + self._kick_serve_loop() + return msg + + @handle_orch_error + def remove_daemons(self, names): + # type: (List[str]) -> List[str] + args = [] + for host, dm in self.cache.daemons.items(): + for name in names: + if name in dm: + args.append((name, host)) + if not args: + raise OrchestratorError('Unable to find daemon(s) %s' % (names)) + self.log.info('Remove daemons %s' % ' '.join([a[0] for a in args])) + return self._remove_daemons(args) + + @handle_orch_error + def remove_service(self, service_name: str, force: bool = False) -> str: + self.log.info('Remove service %s' % service_name) + self._trigger_preview_refresh(service_name=service_name) + if service_name in self.spec_store: + if self.spec_store[service_name].spec.service_type in ('mon', 'mgr'): + return f'Unable to remove {service_name} service.\n' \ + f'Note, you might want to mark the {service_name} service as "unmanaged"' + else: + return f"Invalid service '{service_name}'. Use 'ceph orch ls' to list available services.\n" + + # Report list of affected OSDs? + if not force and service_name.startswith('osd.'): + osds_msg = {} + for h, dm in self.cache.get_daemons_with_volatile_status(): + osds_to_remove = [] + for name, dd in dm.items(): + if dd.daemon_type == 'osd' and dd.service_name() == service_name: + osds_to_remove.append(str(dd.daemon_id)) + if osds_to_remove: + osds_msg[h] = osds_to_remove + if osds_msg: + msg = '' + for h, ls in osds_msg.items(): + msg += f'\thost {h}: {" ".join([f"osd.{id}" for id in ls])}' + raise OrchestratorError(f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}') + + found = self.spec_store.rm(service_name) + if found and service_name.startswith('osd.'): + self.spec_store.finally_rm(service_name) + self._kick_serve_loop() + return f'Removed service {service_name}' + + @handle_orch_error + def get_inventory(self, host_filter: Optional[orchestrator.InventoryFilter] = None, refresh: bool = False) -> List[orchestrator.InventoryHost]: + """ + Return the storage inventory of hosts matching the given filter. + + :param host_filter: host filter + + TODO: + - add filtering by label + """ + if refresh: + if host_filter and host_filter.hosts: + for h in host_filter.hosts: + self.log.debug(f'will refresh {h} devs') + self.cache.invalidate_host_devices(h) + else: + for h in self.cache.get_hosts(): + self.log.debug(f'will refresh {h} devs') + self.cache.invalidate_host_devices(h) + + self.event.set() + self.log.debug('Kicked serve() loop to refresh devices') + + result = [] + for host, dls in self.cache.devices.items(): + if host_filter and host_filter.hosts and host not in host_filter.hosts: + continue + result.append(orchestrator.InventoryHost(host, + inventory.Devices(dls))) + return result + + @handle_orch_error + def zap_device(self, host: str, path: str) -> str: + """Zap a device on a managed host. + + Use ceph-volume zap to return a device to an unused/free state + + Args: + host (str): hostname of the cluster host + path (str): device path + + Raises: + OrchestratorError: host is not a cluster host + OrchestratorError: host is in maintenance and therefore unavailable + OrchestratorError: device path not found on the host + OrchestratorError: device is known to a different ceph cluster + OrchestratorError: device holds active osd + OrchestratorError: device cache hasn't been populated yet.. + + Returns: + str: output from the zap command + """ + + self.log.info('Zap device %s:%s' % (host, path)) + + if host not in self.inventory.keys(): + raise OrchestratorError( + f"Host '{host}' is not a member of the cluster") + + host_info = self.inventory._inventory.get(host, {}) + if host_info.get('status', '').lower() == 'maintenance': + raise OrchestratorError( + f"Host '{host}' is in maintenance mode, which prevents any actions against it.") + + if host not in self.cache.devices: + raise OrchestratorError( + f"Host '{host} hasn't been scanned yet to determine it's inventory. Please try again later.") + + host_devices = self.cache.devices[host] + path_found = False + osd_id_list: List[str] = [] + + for dev in host_devices: + if dev.path == path: + # match, so look a little deeper + if dev.lvs: + for lv in cast(List[Dict[str, str]], dev.lvs): + if lv.get('osd_id', ''): + lv_fsid = lv.get('cluster_fsid') + if lv_fsid != self._cluster_fsid: + raise OrchestratorError( + f"device {path} has lv's from a different Ceph cluster ({lv_fsid})") + osd_id_list.append(lv.get('osd_id', '')) + path_found = True + break + if not path_found: + raise OrchestratorError( + f"Device path '{path}' not found on host '{host}'") + + if osd_id_list: + dev_name = os.path.basename(path) + active_osds: List[str] = [] + for osd_id in osd_id_list: + metadata = self.get_metadata('osd', str(osd_id)) + if metadata: + if metadata.get('hostname', '') == host and dev_name in metadata.get('devices', '').split(','): + active_osds.append("osd." + osd_id) + if active_osds: + raise OrchestratorError( + f"Unable to zap: device '{path}' on {host} has {len(active_osds)} active " + f"OSD{'s' if len(active_osds) > 1 else ''}" + f" ({', '.join(active_osds)}). Use 'ceph orch osd rm' first.") + + out, err, code = CephadmServe(self)._run_cephadm( + host, 'osd', 'ceph-volume', + ['--', 'lvm', 'zap', '--destroy', path], + error_ok=True) + + self.cache.invalidate_host_devices(host) + if code: + raise OrchestratorError('Zap failed: %s' % '\n'.join(out + err)) + msg = f'zap successful for {path} on {host}' + self.log.info(msg) + + return msg + '\n' + + @handle_orch_error + def blink_device_light(self, ident_fault: str, on: bool, locs: List[orchestrator.DeviceLightLoc]) -> List[str]: + """ + Blink a device light. Calling something like:: + + lsmcli local-disk-ident-led-on --path $path + + If you must, you can customize this via:: + + ceph config-key set mgr/cephadm/blink_device_light_cmd '<my jinja2 template>' + ceph config-key set mgr/cephadm/<host>/blink_device_light_cmd '<my jinja2 template>' + + See templates/blink_device_light_cmd.j2 + """ + @forall_hosts + def blink(host: str, dev: str, path: str) -> str: + cmd_line = self.template.render('blink_device_light_cmd.j2', + { + 'on': on, + 'ident_fault': ident_fault, + 'dev': dev, + 'path': path + }, + host=host) + cmd_args = shlex.split(cmd_line) + + out, err, code = CephadmServe(self)._run_cephadm( + host, 'osd', 'shell', ['--'] + cmd_args, + error_ok=True) + if code: + raise OrchestratorError( + 'Unable to affect %s light for %s:%s. Command: %s' % ( + ident_fault, host, dev, ' '.join(cmd_args))) + self.log.info('Set %s light for %s:%s %s' % ( + ident_fault, host, dev, 'on' if on else 'off')) + return "Set %s light for %s:%s %s" % ( + ident_fault, host, dev, 'on' if on else 'off') + + return blink(locs) + + def get_osd_uuid_map(self, only_up=False): + # type: (bool) -> Dict[str, str] + osd_map = self.get('osd_map') + r = {} + for o in osd_map['osds']: + # only include OSDs that have ever started in this map. this way + # an interrupted osd create can be repeated and succeed the second + # time around. + osd_id = o.get('osd') + if osd_id is None: + raise OrchestratorError("Could not retrieve osd_id from osd_map") + if not only_up: + r[str(osd_id)] = o.get('uuid', '') + return r + + def get_osd_by_id(self, osd_id: int) -> Optional[Dict[str, Any]]: + osd = [x for x in self.get('osd_map')['osds'] + if x['osd'] == osd_id] + + if len(osd) != 1: + return None + + return osd[0] + + def _trigger_preview_refresh(self, + specs: Optional[List[DriveGroupSpec]] = None, + service_name: Optional[str] = None, + ) -> None: + # Only trigger a refresh when a spec has changed + trigger_specs = [] + if specs: + for spec in specs: + preview_spec = self.spec_store.spec_preview.get(spec.service_name()) + # the to-be-preview spec != the actual spec, this means we need to + # trigger a refresh, if the spec has been removed (==None) we need to + # refresh as well. + if not preview_spec or spec != preview_spec: + trigger_specs.append(spec) + if service_name: + trigger_specs = [cast(DriveGroupSpec, self.spec_store.spec_preview.get(service_name))] + if not any(trigger_specs): + return None + + refresh_hosts = self.osd_service.resolve_hosts_for_osdspecs(specs=trigger_specs) + for host in refresh_hosts: + self.log.info(f"Marking host: {host} for OSDSpec preview refresh.") + self.cache.osdspec_previews_refresh_queue.append(host) + + @handle_orch_error + def apply_drivegroups(self, specs: List[DriveGroupSpec]) -> List[str]: + """ + Deprecated. Please use `apply()` instead. + + Keeping this around to be compapatible to mgr/dashboard + """ + return [self._apply(spec) for spec in specs] + + @handle_orch_error + def create_osds(self, drive_group: DriveGroupSpec) -> str: + hosts: List[HostSpec] = self.inventory.all_specs() + filtered_hosts: List[str] = drive_group.placement.filter_matching_hostspecs(hosts) + if not filtered_hosts: + return "Invalid 'host:device' spec: host not found in cluster. Please check 'ceph orch host ls' for available hosts" + return self.osd_service.create_from_spec(drive_group) + + def _preview_osdspecs(self, + osdspecs: Optional[List[DriveGroupSpec]] = None + ) -> dict: + if not osdspecs: + return {'n/a': [{'error': True, + 'message': 'No OSDSpec or matching hosts found.'}]} + matching_hosts = self.osd_service.resolve_hosts_for_osdspecs(specs=osdspecs) + if not matching_hosts: + return {'n/a': [{'error': True, + 'message': 'No OSDSpec or matching hosts found.'}]} + # Is any host still loading previews or still in the queue to be previewed + pending_hosts = {h for h in self.cache.loading_osdspec_preview if h in matching_hosts} + if pending_hosts or any(item in self.cache.osdspec_previews_refresh_queue for item in matching_hosts): + # Report 'pending' when any of the matching hosts is still loading previews (flag is True) + return {'n/a': [{'error': True, + 'message': 'Preview data is being generated.. ' + 'Please re-run this command in a bit.'}]} + # drop all keys that are not in search_hosts and only select reports that match the requested osdspecs + previews_for_specs = {} + for host, raw_reports in self.cache.osdspec_previews.items(): + if host not in matching_hosts: + continue + osd_reports = [] + for osd_report in raw_reports: + if osd_report.get('osdspec') in [x.service_id for x in osdspecs]: + osd_reports.append(osd_report) + previews_for_specs.update({host: osd_reports}) + return previews_for_specs + + def _calc_daemon_deps(self, + spec: Optional[ServiceSpec], + daemon_type: str, + daemon_id: str) -> List[str]: + deps = [] + if daemon_type == 'haproxy': + # because cephadm creates new daemon instances whenever + # port or ip changes, identifying daemons by name is + # sufficient to detect changes. + if not spec: + return [] + ingress_spec = cast(IngressSpec, spec) + assert ingress_spec.backend_service + daemons = self.cache.get_daemons_by_service(ingress_spec.backend_service) + deps = [d.name() for d in daemons] + elif daemon_type == 'keepalived': + # because cephadm creates new daemon instances whenever + # port or ip changes, identifying daemons by name is + # sufficient to detect changes. + if not spec: + return [] + daemons = self.cache.get_daemons_by_service(spec.service_name()) + deps = [d.name() for d in daemons if d.daemon_type == 'haproxy'] + elif daemon_type == 'iscsi': + if spec: + iscsi_spec = cast(IscsiServiceSpec, spec) + deps = [self.iscsi_service.get_trusted_ips(iscsi_spec)] + else: + deps = [self.get_mgr_ip()] + else: + need = { + 'prometheus': ['mgr', 'alertmanager', 'node-exporter', 'ingress'], + 'grafana': ['prometheus'], + 'alertmanager': ['mgr', 'alertmanager', 'snmp-gateway'], + } + for dep_type in need.get(daemon_type, []): + for dd in self.cache.get_daemons_by_type(dep_type): + deps.append(dd.name()) + if daemon_type == 'prometheus': + deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283))) + return sorted(deps) + + @forall_hosts + def _remove_daemons(self, name: str, host: str) -> str: + return CephadmServe(self)._remove_daemon(name, host) + + def _check_pool_exists(self, pool: str, service_name: str) -> None: + logger.info(f'Checking pool "{pool}" exists for service {service_name}') + if not self.rados.pool_exists(pool): + raise OrchestratorError(f'Cannot find pool "{pool}" for ' + f'service {service_name}') + + def _add_daemon(self, + daemon_type: str, + spec: ServiceSpec) -> List[str]: + """ + Add (and place) a daemon. Require explicit host placement. Do not + schedule, and do not apply the related scheduling limitations. + """ + if spec.service_name() not in self.spec_store: + raise OrchestratorError('Unable to add a Daemon without Service.\n' + 'Please use `ceph orch apply ...` to create a Service.\n' + 'Note, you might want to create the service with "unmanaged=true"') + + self.log.debug('_add_daemon %s spec %s' % (daemon_type, spec.placement)) + if not spec.placement.hosts: + raise OrchestratorError('must specify host(s) to deploy on') + count = spec.placement.count or len(spec.placement.hosts) + daemons = self.cache.get_daemons_by_service(spec.service_name()) + return self._create_daemons(daemon_type, spec, daemons, + spec.placement.hosts, count) + + def _create_daemons(self, + daemon_type: str, + spec: ServiceSpec, + daemons: List[DaemonDescription], + hosts: List[HostPlacementSpec], + count: int) -> List[str]: + if count > len(hosts): + raise OrchestratorError('too few hosts: want %d, have %s' % ( + count, hosts)) + + did_config = False + service_type = daemon_type_to_service(daemon_type) + + args = [] # type: List[CephadmDaemonDeploySpec] + for host, network, name in hosts: + daemon_id = self.get_unique_name(daemon_type, host, daemons, + prefix=spec.service_id, + forcename=name) + + if not did_config: + self.cephadm_services[service_type].config(spec) + did_config = True + + daemon_spec = self.cephadm_services[service_type].make_daemon_spec( + host, daemon_id, network, spec, + # NOTE: this does not consider port conflicts! + ports=spec.get_port_start()) + self.log.debug('Placing %s.%s on host %s' % ( + daemon_type, daemon_id, host)) + args.append(daemon_spec) + + # add to daemon list so next name(s) will also be unique + sd = orchestrator.DaemonDescription( + hostname=host, + daemon_type=daemon_type, + daemon_id=daemon_id, + ) + daemons.append(sd) + + @ forall_hosts + def create_func_map(*args: Any) -> str: + daemon_spec = self.cephadm_services[daemon_type].prepare_create(*args) + return CephadmServe(self)._create_daemon(daemon_spec) + + return create_func_map(args) + + @handle_orch_error + def add_daemon(self, spec: ServiceSpec) -> List[str]: + ret: List[str] = [] + try: + with orchestrator.set_exception_subject('service', spec.service_name(), overwrite=True): + for d_type in service_to_daemon_types(spec.service_type): + ret.extend(self._add_daemon(d_type, spec)) + return ret + except OrchestratorError as e: + self.events.from_orch_error(e) + raise + + @handle_orch_error + def apply_mon(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + def _apply(self, spec: GenericSpec) -> str: + if spec.service_type == 'host': + return self._add_host(cast(HostSpec, spec)) + + if spec.service_type == 'osd': + # _trigger preview refresh needs to be smart and + # should only refresh if a change has been detected + self._trigger_preview_refresh(specs=[cast(DriveGroupSpec, spec)]) + + return self._apply_service_spec(cast(ServiceSpec, spec)) + + def set_health_warning(self, name: str, summary: str, count: int, detail: List[str]) -> None: + self.health_checks[name] = { + 'severity': 'warning', + 'summary': summary, + 'count': count, + 'detail': detail, + } + self.set_health_checks(self.health_checks) + + def remove_health_warning(self, name: str) -> None: + if name in self.health_checks: + del self.health_checks[name] + self.set_health_checks(self.health_checks) + + def _plan(self, spec: ServiceSpec) -> dict: + if spec.service_type == 'osd': + return {'service_name': spec.service_name(), + 'service_type': spec.service_type, + 'data': self._preview_osdspecs(osdspecs=[cast(DriveGroupSpec, spec)])} + + svc = self.cephadm_services[spec.service_type] + ha = HostAssignment( + spec=spec, + hosts=self._schedulable_hosts(), + unreachable_hosts=self._unreachable_hosts(), + networks=self.cache.networks, + daemons=self.cache.get_daemons_by_service(spec.service_name()), + allow_colo=svc.allow_colo(), + rank_map=self.spec_store[spec.service_name()].rank_map if svc.ranked() else None + ) + ha.validate() + hosts, to_add, to_remove = ha.place() + + return { + 'service_name': spec.service_name(), + 'service_type': spec.service_type, + 'add': [hs.hostname for hs in to_add], + 'remove': [d.name() for d in to_remove] + } + + @handle_orch_error + def plan(self, specs: Sequence[GenericSpec]) -> List: + results = [{'warning': 'WARNING! Dry-Runs are snapshots of a certain point in time and are bound \n' + 'to the current inventory setup. If any of these conditions change, the \n' + 'preview will be invalid. Please make sure to have a minimal \n' + 'timeframe between planning and applying the specs.'}] + if any([spec.service_type == 'host' for spec in specs]): + return [{'error': 'Found <HostSpec>. Previews that include Host Specifications are not supported, yet.'}] + for spec in specs: + results.append(self._plan(cast(ServiceSpec, spec))) + return results + + def _apply_service_spec(self, spec: ServiceSpec) -> str: + if spec.placement.is_empty(): + # fill in default placement + defaults = { + 'mon': PlacementSpec(count=5), + 'mgr': PlacementSpec(count=2), + 'mds': PlacementSpec(count=2), + 'rgw': PlacementSpec(count=2), + 'ingress': PlacementSpec(count=2), + 'iscsi': PlacementSpec(count=1), + 'rbd-mirror': PlacementSpec(count=2), + 'cephfs-mirror': PlacementSpec(count=1), + 'nfs': PlacementSpec(count=1), + 'grafana': PlacementSpec(count=1), + 'alertmanager': PlacementSpec(count=1), + 'prometheus': PlacementSpec(count=1), + 'node-exporter': PlacementSpec(host_pattern='*'), + 'crash': PlacementSpec(host_pattern='*'), + 'container': PlacementSpec(count=1), + 'cephadm-exporter': PlacementSpec(host_pattern='*'), + 'snmp-gateway': PlacementSpec(count=1), + } + spec.placement = defaults[spec.service_type] + elif spec.service_type in ['mon', 'mgr'] and \ + spec.placement.count is not None and \ + spec.placement.count < 1: + raise OrchestratorError('cannot scale %s service below 1' % ( + spec.service_type)) + + host_count = len(self.inventory.keys()) + max_count = self.max_count_per_host + + if spec.placement.count is not None: + if spec.service_type in ['mon', 'mgr']: + if spec.placement.count > max(5, host_count): + raise OrchestratorError( + (f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {max(5, host_count)}.')) + elif spec.service_type != 'osd': + if spec.placement.count > (max_count * host_count): + raise OrchestratorError((f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {host_count*max_count} ({host_count}x{max_count}).' + + ' This limit can be adjusted by changing the mgr/cephadm/max_count_per_host config option')) + + if spec.placement.count_per_host is not None and spec.placement.count_per_host > max_count and spec.service_type != 'osd': + raise OrchestratorError((f'The maximum count_per_host allowed is {max_count}.' + + ' This limit can be adjusted by changing the mgr/cephadm/max_count_per_host config option')) + + HostAssignment( + spec=spec, + hosts=self.inventory.all_specs(), # All hosts, even those without daemon refresh + unreachable_hosts=self._unreachable_hosts(), + networks=self.cache.networks, + daemons=self.cache.get_daemons_by_service(spec.service_name()), + allow_colo=self.cephadm_services[spec.service_type].allow_colo(), + ).validate() + + self.log.info('Saving service %s spec with placement %s' % ( + spec.service_name(), spec.placement.pretty_str())) + self.spec_store.save(spec) + self._kick_serve_loop() + return "Scheduled %s update..." % spec.service_name() + + @handle_orch_error + def apply(self, specs: Sequence[GenericSpec], no_overwrite: bool = False) -> List[str]: + results = [] + for spec in specs: + if no_overwrite: + if spec.service_type == 'host' and cast(HostSpec, spec).hostname in self.inventory: + results.append('Skipped %s host spec. To change %s spec omit --no-overwrite flag' + % (cast(HostSpec, spec).hostname, spec.service_type)) + continue + elif cast(ServiceSpec, spec).service_name() in self.spec_store: + results.append('Skipped %s service spec. To change %s spec omit --no-overwrite flag' + % (cast(ServiceSpec, spec).service_name(), cast(ServiceSpec, spec).service_name())) + continue + results.append(self._apply(spec)) + return results + + @handle_orch_error + def apply_mgr(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_mds(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_rgw(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_ingress(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_iscsi(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_rbd_mirror(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_nfs(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + def _get_dashboard_url(self): + # type: () -> str + return self.get('mgr_map').get('services', {}).get('dashboard', '') + + @handle_orch_error + def apply_prometheus(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_node_exporter(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_crash(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_grafana(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_alertmanager(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_container(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_snmp_gateway(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def apply_cephadm_exporter(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error + def upgrade_check(self, image: str, version: str) -> str: + if self.inventory.get_host_with_state("maintenance"): + raise OrchestratorError("check aborted - you have hosts in maintenance state") + + if version: + target_name = self.container_image_base + ':v' + version + elif image: + target_name = image + else: + raise OrchestratorError('must specify either image or version') + + image_info = CephadmServe(self)._get_container_image_info(target_name) + + ceph_image_version = image_info.ceph_version + if not ceph_image_version: + return f'Unable to extract ceph version from {target_name}.' + if ceph_image_version.startswith('ceph version '): + ceph_image_version = ceph_image_version.split(' ')[2] + version_error = self.upgrade._check_target_version(ceph_image_version) + if version_error: + return f'Incompatible upgrade: {version_error}' + + self.log.debug(f'image info {image} -> {image_info}') + r: dict = { + 'target_name': target_name, + 'target_id': image_info.image_id, + 'target_version': image_info.ceph_version, + 'needs_update': dict(), + 'up_to_date': list(), + 'non_ceph_image_daemons': list() + } + for host, dm in self.cache.daemons.items(): + for name, dd in dm.items(): + if image_info.image_id == dd.container_image_id: + r['up_to_date'].append(dd.name()) + elif dd.daemon_type in CEPH_IMAGE_TYPES: + r['needs_update'][dd.name()] = { + 'current_name': dd.container_image_name, + 'current_id': dd.container_image_id, + 'current_version': dd.version, + } + else: + r['non_ceph_image_daemons'].append(dd.name()) + if self.use_repo_digest and image_info.repo_digests: + # FIXME: we assume the first digest is the best one to use + r['target_digest'] = image_info.repo_digests[0] + + return json.dumps(r, indent=4, sort_keys=True) + + @handle_orch_error + def upgrade_status(self) -> orchestrator.UpgradeStatusSpec: + return self.upgrade.upgrade_status() + + @handle_orch_error + def upgrade_ls(self, image: Optional[str], tags: bool) -> Dict[Any, Any]: + return self.upgrade.upgrade_ls(image, tags) + + @handle_orch_error + def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, host_placement: Optional[str] = None, + services: Optional[List[str]] = None, limit: Optional[int] = None) -> str: + if self.inventory.get_host_with_state("maintenance"): + raise OrchestratorError("upgrade aborted - you have host(s) in maintenance state") + if daemon_types is not None and services is not None: + raise OrchestratorError('--daemon-types and --services are mutually exclusive') + if daemon_types is not None: + for dtype in daemon_types: + if dtype not in CEPH_UPGRADE_ORDER: + raise OrchestratorError(f'Upgrade aborted - Got unexpected daemon type "{dtype}".\n' + f'Viable daemon types for this command are: {utils.CEPH_TYPES + utils.GATEWAY_TYPES}') + if services is not None: + for service in services: + if service not in self.spec_store: + raise OrchestratorError(f'Upgrade aborted - Got unknown service name "{service}".\n' + f'Known services are: {self.spec_store.all_specs.keys()}') + hosts: Optional[List[str]] = None + if host_placement is not None: + all_hosts = list(self.inventory.all_specs()) + placement = PlacementSpec.from_string(host_placement) + hosts = placement.filter_matching_hostspecs(all_hosts) + if not hosts: + raise OrchestratorError( + f'Upgrade aborted - hosts parameter "{host_placement}" provided did not match any hosts') + + if limit is not None: + if limit < 1: + raise OrchestratorError(f'Upgrade aborted - --limit arg must be a positive integer, not {limit}') + + return self.upgrade.upgrade_start(image, version, daemon_types, hosts, services, limit) + + @handle_orch_error + def upgrade_pause(self) -> str: + return self.upgrade.upgrade_pause() + + @handle_orch_error + def upgrade_resume(self) -> str: + return self.upgrade.upgrade_resume() + + @handle_orch_error + def upgrade_stop(self) -> str: + return self.upgrade.upgrade_stop() + + @handle_orch_error + def remove_osds(self, osd_ids: List[str], + replace: bool = False, + force: bool = False, + zap: bool = False) -> str: + """ + Takes a list of OSDs and schedules them for removal. + The function that takes care of the actual removal is + process_removal_queue(). + """ + + daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('osd') + to_remove_daemons = list() + for daemon in daemons: + if daemon.daemon_id in osd_ids: + to_remove_daemons.append(daemon) + if not to_remove_daemons: + return f"Unable to find OSDs: {osd_ids}" + + for daemon in to_remove_daemons: + assert daemon.daemon_id is not None + try: + self.to_remove_osds.enqueue(OSD(osd_id=int(daemon.daemon_id), + replace=replace, + force=force, + zap=zap, + hostname=daemon.hostname, + process_started_at=datetime_now(), + remove_util=self.to_remove_osds.rm_util)) + except NotFoundError: + return f"Unable to find OSDs: {osd_ids}" + + # trigger the serve loop to initiate the removal + self._kick_serve_loop() + return "Scheduled OSD(s) for removal" + + @handle_orch_error + def stop_remove_osds(self, osd_ids: List[str]) -> str: + """ + Stops a `removal` process for a List of OSDs. + This will revert their weight and remove it from the osds_to_remove queue + """ + for osd_id in osd_ids: + try: + self.to_remove_osds.rm(OSD(osd_id=int(osd_id), + remove_util=self.to_remove_osds.rm_util)) + except (NotFoundError, KeyError, ValueError): + return f'Unable to find OSD in the queue: {osd_id}' + + # trigger the serve loop to halt the removal + self._kick_serve_loop() + return "Stopped OSD(s) removal" + + @handle_orch_error + def remove_osds_status(self) -> List[OSD]: + """ + The CLI call to retrieve an osd removal report + """ + return self.to_remove_osds.all_osds() + + @handle_orch_error + def drain_host(self, hostname, force=False): + # type: (str, bool) -> str + """ + Drain all daemons from a host. + :param host: host name + """ + + # if we drain the last admin host we could end up removing the only instance + # of the config and keyring and cause issues + if not force: + p = PlacementSpec(label='_admin') + admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs()) + if len(admin_hosts) == 1 and admin_hosts[0] == hostname: + raise OrchestratorValidationError(f"Host {hostname} is the last host with the '_admin'" + " label.\nDraining this host could cause the removal" + " of the last cluster config/keyring managed by cephadm.\n" + "It is recommended to add the _admin label to another host" + " before completing this operation.\nIf you're certain this is" + " what you want rerun this command with --force.") + + self.add_host_label(hostname, '_no_schedule') + + daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname) + + osds_to_remove = [d.daemon_id for d in daemons if d.daemon_type == 'osd'] + self.remove_osds(osds_to_remove) + + daemons_table = "" + daemons_table += "{:<20} {:<15}\n".format("type", "id") + daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15) + for d in daemons: + daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id) + + return "Scheduled to remove the following daemons from host '{}'\n{}".format(hostname, daemons_table) + + def trigger_connect_dashboard_rgw(self) -> None: + self.need_connect_dashboard_rgw = True + self.event.set() diff --git a/src/pybind/mgr/cephadm/offline_watcher.py b/src/pybind/mgr/cephadm/offline_watcher.py new file mode 100644 index 000000000..006156fc7 --- /dev/null +++ b/src/pybind/mgr/cephadm/offline_watcher.py @@ -0,0 +1,70 @@ +import logging +from typing import List, Optional, TYPE_CHECKING + +import multiprocessing as mp +import threading + +from cephadm.serve import CephadmServe + +try: + import remoto +except ImportError: + remoto = None + + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + + +class OfflineHostWatcher(threading.Thread): + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr = mgr + self.hosts: Optional[List[str]] = None + self.new_hosts: Optional[List[str]] = None + self.stop = False + self.event = threading.Event() + super(OfflineHostWatcher, self).__init__(target=self.run) + + def run(self) -> None: + self.thread_pool = mp.pool.ThreadPool(10) + while not self.stop: + # only need to take action if we have hosts to check + if self.hosts or self.new_hosts: + if self.new_hosts: + self.hosts = self.new_hosts + self.new_hosts = None + logger.debug(f'OfflineHostDetector: Checking if hosts: {self.hosts} are offline.') + assert self.hosts is not None + self.thread_pool.map(self.check_host, self.hosts) + self.event.wait(20) + self.event.clear() + self.thread_pool.close() + self.thread_pool.join() + + def check_host(self, host: str) -> None: + if host not in self.mgr.offline_hosts: + try: + with CephadmServe(self.mgr)._remote_connection(host) as tpl: + conn, connr = tpl + out, err, code = remoto.process.check(conn, ['true']) + except Exception: + logger.debug(f'OfflineHostDetector: detected {host} to be offline') + # kick serve loop in case corrective action must be taken for offline host + self.mgr._kick_serve_loop() + + def set_hosts(self, hosts: List[str]) -> None: + hosts.sort() + if (not self.hosts or self.hosts != hosts) and hosts: + self.new_hosts = hosts + logger.debug( + f'OfflineHostDetector: Hosts to check if offline swapped to: {self.new_hosts}.') + self.wakeup() + + def wakeup(self) -> None: + self.event.set() + + def shutdown(self) -> None: + self.stop = True + self.wakeup() diff --git a/src/pybind/mgr/cephadm/registry.py b/src/pybind/mgr/cephadm/registry.py new file mode 100644 index 000000000..31e5fb23e --- /dev/null +++ b/src/pybind/mgr/cephadm/registry.py @@ -0,0 +1,65 @@ +import requests +from typing import List, Dict, Tuple +from requests import Response + + +class Registry: + + def __init__(self, url: str): + self._url: str = url + + @property + def api_domain(self) -> str: + if self._url == 'docker.io': + return 'registry-1.docker.io' + return self._url + + def get_token(self, response: Response) -> str: + realm, params = self.parse_www_authenticate(response.headers['Www-Authenticate']) + r = requests.get(realm, params=params) + r.raise_for_status() + ret = r.json() + if 'access_token' in ret: + return ret['access_token'] + if 'token' in ret: + return ret['token'] + raise ValueError(f'Unknown token reply {ret}') + + def parse_www_authenticate(self, text: str) -> Tuple[str, Dict[str, str]]: + # 'Www-Authenticate': 'Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:ceph/ceph:pull"' + r: Dict[str, str] = {} + for token in text.split(','): + key, value = token.split('=', 1) + r[key] = value.strip('"') + realm = r.pop('Bearer realm') + return realm, r + + def get_tags(self, image: str) -> List[str]: + tags = [] + headers = {'Accept': 'application/json'} + url = f'https://{self.api_domain}/v2/{image}/tags/list' + while True: + try: + r = requests.get(url, headers=headers) + except requests.exceptions.ConnectionError as e: + msg = f"Cannot get tags from url '{url}': {e}" + raise ValueError(msg) from e + if r.status_code == 401: + if 'Authorization' in headers: + raise ValueError('failed authentication') + token = self.get_token(r) + headers['Authorization'] = f'Bearer {token}' + continue + r.raise_for_status() + + new_tags = r.json()['tags'] + tags.extend(new_tags) + + if 'Link' not in r.headers: + break + + # strip < > brackets off and prepend the domain + url = f'https://{self.api_domain}' + r.headers['Link'].split(';')[0][1:-1] + continue + + return tags diff --git a/src/pybind/mgr/cephadm/remotes.py b/src/pybind/mgr/cephadm/remotes.py new file mode 100644 index 000000000..e1ecf2dcb --- /dev/null +++ b/src/pybind/mgr/cephadm/remotes.py @@ -0,0 +1,50 @@ +# ceph-deploy ftw +import os +try: + from typing import Optional +except ImportError: + pass + +PYTHONS = ['python3', 'python2', 'python'] +PATH = [ + '/usr/bin', + '/usr/local/bin', + '/bin', + '/usr/sbin', + '/usr/local/sbin', + '/sbin', +] + + +def choose_python(): + # type: () -> Optional[str] + for e in PYTHONS: + for b in PATH: + p = os.path.join(b, e) + if os.path.exists(p): + return p + return None + + +def write_file(path: str, content: bytes, mode: int, uid: int, gid: int, + mkdir_p: bool = True) -> Optional[str]: + try: + if mkdir_p: + dirname = os.path.dirname(path) + if not os.path.exists(dirname): + os.makedirs(dirname) + tmp_path = path + '.new' + with open(tmp_path, 'wb') as f: + os.fchown(f.fileno(), uid, gid) + os.fchmod(f.fileno(), mode) + f.write(content) + os.fsync(f.fileno()) + os.rename(tmp_path, path) + except Exception as e: + return str(e) + return None + + +if __name__ == '__channelexec__': + for item in channel: # type: ignore # noqa: F821 + channel.send(eval(item)) # type: ignore # noqa: F821 diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py new file mode 100644 index 000000000..a727f5049 --- /dev/null +++ b/src/pybind/mgr/cephadm/schedule.py @@ -0,0 +1,447 @@ +import hashlib +import logging +import random +from typing import List, Optional, Callable, TypeVar, Tuple, NamedTuple, Dict + +import orchestrator +from ceph.deployment.service_spec import ServiceSpec +from orchestrator._interface import DaemonDescription +from orchestrator import OrchestratorValidationError +from .utils import RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES + +logger = logging.getLogger(__name__) +T = TypeVar('T') + + +class DaemonPlacement(NamedTuple): + daemon_type: str + hostname: str + network: str = '' # for mons only + name: str = '' + ip: Optional[str] = None + ports: List[int] = [] + rank: Optional[int] = None + rank_generation: Optional[int] = None + + def __str__(self) -> str: + res = self.daemon_type + ':' + self.hostname + other = [] + if self.rank is not None: + other.append(f'rank={self.rank}.{self.rank_generation}') + if self.network: + other.append(f'network={self.network}') + if self.name: + other.append(f'name={self.name}') + if self.ports: + other.append(f'{self.ip or "*"}:{",".join(map(str, self.ports))}') + if other: + res += '(' + ' '.join(other) + ')' + return res + + def renumber_ports(self, n: int) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + [p + n for p in self.ports], + self.rank, + self.rank_generation, + ) + + def assign_rank(self, rank: int, gen: int) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + self.ports, + rank, + gen, + ) + + def assign_name(self, name: str) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + name, + self.ip, + self.ports, + self.rank, + self.rank_generation, + ) + + def assign_rank_generation( + self, + rank: int, + rank_map: Dict[int, Dict[int, Optional[str]]] + ) -> 'DaemonPlacement': + if rank not in rank_map: + rank_map[rank] = {} + gen = 0 + else: + gen = max(rank_map[rank].keys()) + 1 + rank_map[rank][gen] = None + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + self.ports, + rank, + gen, + ) + + def matches_daemon(self, dd: DaemonDescription) -> bool: + if self.daemon_type != dd.daemon_type: + return False + if self.hostname != dd.hostname: + return False + # fixme: how to match against network? + if self.name and self.name != dd.daemon_id: + return False + if self.ports: + if self.ports != dd.ports and dd.ports: + return False + if self.ip != dd.ip and dd.ip: + return False + return True + + def matches_rank_map( + self, + dd: DaemonDescription, + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]], + ranks: List[int] + ) -> bool: + if rank_map is None: + # daemon should have no rank + return dd.rank is None + + if dd.rank is None: + return False + + if dd.rank not in rank_map: + return False + if dd.rank not in ranks: + return False + + # must be the highest/newest rank_generation + if dd.rank_generation != max(rank_map[dd.rank].keys()): + return False + + # must be *this* daemon + return rank_map[dd.rank][dd.rank_generation] == dd.daemon_id + + +class HostAssignment(object): + + def __init__(self, + spec: ServiceSpec, + hosts: List[orchestrator.HostSpec], + unreachable_hosts: List[orchestrator.HostSpec], + daemons: List[orchestrator.DaemonDescription], + networks: Dict[str, Dict[str, Dict[str, List[str]]]] = {}, + filter_new_host: Optional[Callable[[str], bool]] = None, + allow_colo: bool = False, + primary_daemon_type: Optional[str] = None, + per_host_daemon_type: Optional[str] = None, + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] = None, + ): + assert spec + self.spec = spec # type: ServiceSpec + self.primary_daemon_type = primary_daemon_type or spec.service_type + self.hosts: List[orchestrator.HostSpec] = hosts + self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts + self.filter_new_host = filter_new_host + self.service_name = spec.service_name() + self.daemons = daemons + self.networks = networks + self.allow_colo = allow_colo + self.per_host_daemon_type = per_host_daemon_type + self.ports_start = spec.get_port_start() + self.rank_map = rank_map + + def hosts_by_label(self, label: str) -> List[orchestrator.HostSpec]: + return [h for h in self.hosts if label in h.labels] + + def get_hostnames(self) -> List[str]: + return [h.hostname for h in self.hosts] + + def validate(self) -> None: + self.spec.validate() + + if self.spec.placement.count == 0: + raise OrchestratorValidationError( + f'<count> can not be 0 for {self.spec.one_line_str()}') + + if ( + self.spec.placement.count_per_host is not None + and self.spec.placement.count_per_host > 1 + and not self.allow_colo + ): + raise OrchestratorValidationError( + f'Cannot place more than one {self.spec.service_type} per host' + ) + + if self.spec.placement.hosts: + explicit_hostnames = {h.hostname for h in self.spec.placement.hosts} + unknown_hosts = explicit_hostnames.difference(set(self.get_hostnames())) + if unknown_hosts: + raise OrchestratorValidationError( + f'Cannot place {self.spec.one_line_str()} on {", ".join(sorted(unknown_hosts))}: Unknown hosts') + + if self.spec.placement.host_pattern: + pattern_hostnames = self.spec.placement.filter_matching_hostspecs(self.hosts) + if not pattern_hostnames: + raise OrchestratorValidationError( + f'Cannot place {self.spec.one_line_str()}: No matching hosts') + + if self.spec.placement.label: + label_hosts = self.hosts_by_label(self.spec.placement.label) + if not label_hosts: + raise OrchestratorValidationError( + f'Cannot place {self.spec.one_line_str()}: No matching ' + f'hosts for label {self.spec.placement.label}') + + def place_per_host_daemons( + self, + slots: List[DaemonPlacement], + to_add: List[DaemonPlacement], + to_remove: List[orchestrator.DaemonDescription], + ) -> Tuple[List[DaemonPlacement], List[DaemonPlacement], List[orchestrator.DaemonDescription]]: + if self.per_host_daemon_type: + host_slots = [ + DaemonPlacement(daemon_type=self.per_host_daemon_type, + hostname=hostname) + for hostname in set([s.hostname for s in slots]) + ] + existing = [ + d for d in self.daemons if d.daemon_type == self.per_host_daemon_type + ] + slots += host_slots + for dd in existing: + found = False + for p in host_slots: + if p.matches_daemon(dd): + host_slots.remove(p) + found = True + break + if not found: + to_remove.append(dd) + to_add += host_slots + + to_remove = [d for d in to_remove if d.hostname not in [ + h.hostname for h in self.unreachable_hosts]] + + return slots, to_add, to_remove + + def place(self): + # type: () -> Tuple[List[DaemonPlacement], List[DaemonPlacement], List[orchestrator.DaemonDescription]] + """ + Generate a list of HostPlacementSpec taking into account: + + * all known hosts + * hosts with existing daemons + * placement spec + * self.filter_new_host + """ + + self.validate() + + count = self.spec.placement.count + + # get candidate hosts based on [hosts, label, host_pattern] + candidates = self.get_candidates() # type: List[DaemonPlacement] + if self.primary_daemon_type in RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES: + # remove unreachable hosts that are not in maintenance so daemons + # on these hosts will be rescheduled + candidates = self.remove_non_maintenance_unreachable_candidates(candidates) + + def expand_candidates(ls: List[DaemonPlacement], num: int) -> List[DaemonPlacement]: + r = [] + for offset in range(num): + r.extend([dp.renumber_ports(offset) for dp in ls]) + return r + + # consider enough slots to fulfill target count-per-host or count + if count is None: + if self.spec.placement.count_per_host: + per_host = self.spec.placement.count_per_host + else: + per_host = 1 + candidates = expand_candidates(candidates, per_host) + elif self.allow_colo and candidates: + per_host = 1 + ((count - 1) // len(candidates)) + candidates = expand_candidates(candidates, per_host) + + # consider (preserve) existing daemons in a particular order... + daemons = sorted( + [ + d for d in self.daemons if d.daemon_type == self.primary_daemon_type + ], + key=lambda d: ( + not d.is_active, # active before standby + d.rank is not None, # ranked first, then non-ranked + d.rank, # low ranks + 0 - (d.rank_generation or 0), # newer generations first + ) + ) + + # sort candidates into existing/used slots that already have a + # daemon, and others (the rest) + existing_active: List[orchestrator.DaemonDescription] = [] + existing_standby: List[orchestrator.DaemonDescription] = [] + existing_slots: List[DaemonPlacement] = [] + to_add: List[DaemonPlacement] = [] + to_remove: List[orchestrator.DaemonDescription] = [] + ranks: List[int] = list(range(len(candidates))) + others: List[DaemonPlacement] = candidates.copy() + for dd in daemons: + found = False + for p in others: + if p.matches_daemon(dd) and p.matches_rank_map(dd, self.rank_map, ranks): + others.remove(p) + if dd.is_active: + existing_active.append(dd) + else: + existing_standby.append(dd) + if dd.rank is not None: + assert dd.rank_generation is not None + p = p.assign_rank(dd.rank, dd.rank_generation) + ranks.remove(dd.rank) + existing_slots.append(p) + found = True + break + if not found: + to_remove.append(dd) + + # TODO: At some point we want to deploy daemons that are on offline hosts + # at what point we do this differs per daemon type. Stateless daemons we could + # do quickly to improve availability. Steful daemons we might want to wait longer + # to see if the host comes back online + + existing = existing_active + existing_standby + + # build to_add + if not count: + to_add = [dd for dd in others if dd.hostname not in [ + h.hostname for h in self.unreachable_hosts]] + else: + # The number of new slots that need to be selected in order to fulfill count + need = count - len(existing) + + # we don't need any additional placements + if need <= 0: + to_remove.extend(existing[count:]) + del existing_slots[count:] + return self.place_per_host_daemons(existing_slots, [], to_remove) + + for dp in others: + if need <= 0: + break + if dp.hostname not in [h.hostname for h in self.unreachable_hosts]: + to_add.append(dp) + need -= 1 # this is last use of need in this function so it can work as a counter + + if self.rank_map is not None: + # assign unused ranks (and rank_generations) to to_add + assert len(ranks) >= len(to_add) + for i in range(len(to_add)): + to_add[i] = to_add[i].assign_rank_generation(ranks[i], self.rank_map) + + logger.debug('Combine hosts with existing daemons %s + new hosts %s' % (existing, to_add)) + return self.place_per_host_daemons(existing_slots + to_add, to_add, to_remove) + + def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]: + for subnet in subnets: + ips: List[str] = [] + for iface, iface_ips in self.networks.get(hostname, {}).get(subnet, {}).items(): + ips.extend(iface_ips) + if ips: + return sorted(ips)[0] + return None + + def get_candidates(self) -> List[DaemonPlacement]: + if self.spec.placement.hosts: + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=h.hostname, network=h.network, name=h.name, + ports=self.ports_start) + for h in self.spec.placement.hosts + ] + elif self.spec.placement.label: + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=x.hostname, ports=self.ports_start) + for x in self.hosts_by_label(self.spec.placement.label) + ] + elif self.spec.placement.host_pattern: + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=x, ports=self.ports_start) + for x in self.spec.placement.filter_matching_hostspecs(self.hosts) + ] + elif ( + self.spec.placement.count is not None + or self.spec.placement.count_per_host is not None + ): + ls = [ + DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=x.hostname, ports=self.ports_start) + for x in self.hosts + ] + else: + raise OrchestratorValidationError( + "placement spec is empty: no hosts, no label, no pattern, no count") + + # allocate an IP? + if self.spec.networks: + orig = ls.copy() + ls = [] + for p in orig: + ip = self.find_ip_on_host(p.hostname, self.spec.networks) + if ip: + ls.append(DaemonPlacement(daemon_type=self.primary_daemon_type, + hostname=p.hostname, network=p.network, + name=p.name, ports=p.ports, ip=ip)) + else: + logger.debug( + f'Skipping {p.hostname} with no IP in network(s) {self.spec.networks}' + ) + + if self.filter_new_host: + old = ls.copy() + ls = [] + for h in old: + if self.filter_new_host(h.hostname): + ls.append(h) + if len(old) > len(ls): + logger.debug('Filtered %s down to %s' % (old, ls)) + + # now that we have the list of nodes candidates based on the configured + # placement, let's shuffle the list for node pseudo-random selection. For this, + # we generate a seed from the service name and we use to shuffle the candidates. + # This makes shuffling deterministic for the same service name. + seed = int( + hashlib.sha1(self.spec.service_name().encode('utf-8')).hexdigest(), + 16 + ) % (2 ** 32) # truncate result to 32 bits + final = sorted(ls) + random.Random(seed).shuffle(final) + return final + + def remove_non_maintenance_unreachable_candidates(self, candidates: List[DaemonPlacement]) -> List[DaemonPlacement]: + in_maintenance: Dict[str, bool] = {} + for h in self.hosts: + if h.status.lower() == 'maintenance': + in_maintenance[h.hostname] = True + continue + in_maintenance[h.hostname] = False + unreachable_hosts = [h.hostname for h in self.unreachable_hosts] + candidates = [ + c for c in candidates if c.hostname not in unreachable_hosts or in_maintenance[c.hostname]] + return candidates diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py new file mode 100644 index 000000000..7ac6fee88 --- /dev/null +++ b/src/pybind/mgr/cephadm/serve.py @@ -0,0 +1,1487 @@ +import hashlib +import json +import logging +import uuid +from collections import defaultdict +from contextlib import contextmanager +from typing import TYPE_CHECKING, Optional, List, cast, Dict, Any, Union, Tuple, Iterator, \ + DefaultDict + +from cephadm import remotes + +try: + import remoto + import execnet.gateway_bootstrap +except ImportError: + remoto = None + +from ceph.deployment import inventory +from ceph.deployment.drive_group import DriveGroupSpec +from ceph.deployment.service_spec import ServiceSpec, CustomContainerSpec, PlacementSpec +from ceph.utils import str_to_datetime, datetime_now + +import orchestrator +from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent, \ + DaemonDescriptionStatus, daemon_type_to_service +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec +from cephadm.schedule import HostAssignment +from cephadm.autotune import MemoryAutotuner +from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \ + CephadmNoImage, CEPH_TYPES, ContainerInspectInfo +from mgr_module import MonCommandFailed +from mgr_util import format_bytes + +from . import utils + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + from remoto.backends import BaseConnection + +logger = logging.getLogger(__name__) + +REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw'] + + +class CephadmServe: + """ + This module contains functions that are executed in the + serve() thread. Thus they don't block the CLI. + + Please see the `Note regarding network calls from CLI handlers` + chapter in the cephadm developer guide. + + On the other hand, These function should *not* be called form + CLI handlers, to avoid blocking the CLI + """ + + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + self.log = logger + + def serve(self) -> None: + """ + The main loop of cephadm. + + A command handler will typically change the declarative state + of cephadm. This loop will then attempt to apply this new state. + """ + self.log.debug("serve starting") + self.mgr.config_checker.load_network_config() + + while self.mgr.run: + self.log.debug("serve loop start") + + try: + + self.convert_tags_to_repo_digest() + + # refresh daemons + self.log.debug('refreshing hosts and daemons') + self._refresh_hosts_and_daemons() + + self._check_for_strays() + + self._update_paused_health() + + if self.mgr.need_connect_dashboard_rgw and self.mgr.config_dashboard: + self.mgr.need_connect_dashboard_rgw = False + if 'dashboard' in self.mgr.get('mgr_map')['modules']: + self.log.info('Checking dashboard <-> RGW credentials') + self.mgr.remote('dashboard', 'set_rgw_credentials') + + if not self.mgr.paused: + self.mgr.to_remove_osds.process_removal_queue() + + self.mgr.migration.migrate() + if self.mgr.migration.is_migration_ongoing(): + continue + + if self._apply_all_services(): + continue # did something, refresh + + self._check_daemons() + + self._purge_deleted_services() + + self._check_for_moved_osds() + + if self.mgr.upgrade.continue_upgrade(): + continue + + except OrchestratorError as e: + if e.event_subject: + self.mgr.events.from_orch_error(e) + + self.log.debug("serve loop sleep") + self._serve_sleep() + self.log.debug("serve loop wake") + self.log.debug("serve exit") + + def _serve_sleep(self) -> None: + sleep_interval = max( + 30, + min( + self.mgr.host_check_interval, + self.mgr.facts_cache_timeout, + self.mgr.daemon_cache_timeout, + self.mgr.device_cache_timeout, + ) + ) + self.log.debug('Sleeping for %d seconds', sleep_interval) + self.mgr.event.wait(sleep_interval) + self.mgr.event.clear() + + def _update_paused_health(self) -> None: + self.log.debug('_update_paused_health') + if self.mgr.paused: + self.mgr.set_health_warning('CEPHADM_PAUSED', 'cephadm background work is paused', 1, ["'ceph orch resume' to resume"]) + else: + self.mgr.remove_health_warning('CEPHADM_PAUSED') + + def _autotune_host_memory(self, host: str) -> None: + total_mem = self.mgr.cache.get_facts(host).get('memory_total_kb', 0) + if not total_mem: + val = None + else: + total_mem *= 1024 # kb -> bytes + total_mem *= self.mgr.autotune_memory_target_ratio + a = MemoryAutotuner( + daemons=self.mgr.cache.get_daemons_by_host(host), + config_get=self.mgr.get_foreign_ceph_option, + total_mem=total_mem, + ) + val, osds = a.tune() + any_changed = False + for o in osds: + if self.mgr.get_foreign_ceph_option(o, 'osd_memory_target') != val: + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': o, + 'name': 'osd_memory_target', + }) + any_changed = True + if val is not None: + if any_changed: + self.mgr.log.info( + f'Adjusting osd_memory_target on {host} to {format_bytes(val, 6)}' + ) + ret, out, err = self.mgr.mon_command({ + 'prefix': 'config set', + 'who': f'osd/host:{host.split(".")[0]}', + 'name': 'osd_memory_target', + 'value': str(val), + }) + if ret: + self.log.warning( + f'Unable to set osd_memory_target on {host} to {val}: {err}' + ) + else: + # if osd memory autotuning is off, we don't want to remove these config + # options as users may be using them. Since there is no way to set autotuning + # on/off at a host level, best we can do is check if it is globally on. + if self.mgr.get_foreign_ceph_option('osd', 'osd_memory_target_autotune'): + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': f'osd/host:{host.split(".")[0]}', + 'name': 'osd_memory_target', + }) + self.mgr.cache.update_autotune(host) + + def _refresh_hosts_and_daemons(self) -> None: + self.log.debug('_refresh_hosts_and_daemons') + bad_hosts = [] + failures = [] + + if self.mgr.manage_etc_ceph_ceph_conf or self.mgr.keys.keys: + client_files = self._calc_client_files() + else: + client_files = {} + + @forall_hosts + def refresh(host: str) -> None: + + # skip hosts that are in maintenance - they could be powered off + if self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance": + return + + if self.mgr.cache.host_needs_check(host): + r = self._check_host(host) + if r is not None: + bad_hosts.append(r) + if self.mgr.cache.host_needs_daemon_refresh(host): + self.log.debug('refreshing %s daemons' % host) + r = self._refresh_host_daemons(host) + if r: + failures.append(r) + + if self.mgr.cache.host_needs_registry_login(host) and self.mgr.get_store('registry_credentials'): + self.log.debug(f"Logging `{host}` into custom registry") + r = self._registry_login( + host, json.loads(str(self.mgr.get_store('registry_credentials')))) + if r: + bad_hosts.append(r) + + if self.mgr.cache.host_needs_device_refresh(host): + self.log.debug('refreshing %s devices' % host) + r = self._refresh_host_devices(host) + if r: + failures.append(r) + + if self.mgr.cache.host_needs_facts_refresh(host): + self.log.debug(('Refreshing %s facts' % host)) + r = self._refresh_facts(host) + if r: + failures.append(r) + + if self.mgr.cache.host_needs_osdspec_preview_refresh(host): + self.log.debug(f"refreshing OSDSpec previews for {host}") + r = self._refresh_host_osdspec_previews(host) + if r: + failures.append(r) + + if ( + self.mgr.cache.host_needs_autotune_memory(host) + and not self.mgr.inventory.has_label(host, '_no_autotune_memory') + ): + self.log.debug(f"autotuning memory for {host}") + self._autotune_host_memory(host) + + self._write_client_files(client_files, host) + + refresh(self.mgr.cache.get_hosts()) + + self.mgr.config_checker.run_checks() + + for k in [ + 'CEPHADM_HOST_CHECK_FAILED', + 'CEPHADM_FAILED_DAEMON', + 'CEPHADM_REFRESH_FAILED', + ]: + self.mgr.remove_health_warning(k) + if bad_hosts: + self.mgr.set_health_warning('CEPHADM_HOST_CHECK_FAILED', f'{len(bad_hosts)} hosts fail cephadm check', len(bad_hosts), bad_hosts) + if failures: + self.mgr.set_health_warning('CEPHADM_REFRESH_FAILED', 'failed to probe daemons or devices', len(failures), failures) + failed_daemons = [] + for dd in self.mgr.cache.get_daemons(): + if dd.status is not None and dd.status == DaemonDescriptionStatus.error: + failed_daemons.append('daemon %s on %s is in %s state' % ( + dd.name(), dd.hostname, dd.status_desc + )) + if failed_daemons: + self.mgr.set_health_warning('CEPHADM_FAILED_DAEMON', f'{len(failed_daemons)} failed cephadm daemon(s)', len(failed_daemons), failed_daemons) + + def _check_host(self, host: str) -> Optional[str]: + if host not in self.mgr.inventory: + return None + self.log.debug(' checking %s' % host) + try: + addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host + out, err, code = self._run_cephadm( + host, cephadmNoImage, 'check-host', [], + error_ok=True, no_fsid=True) + self.mgr.cache.update_last_host_check(host) + self.mgr.cache.save_host(host) + if code: + self.log.debug(' host %s (%s) failed check' % (host, addr)) + if self.mgr.warn_on_failed_host_check: + return 'host %s (%s) failed check: %s' % (host, addr, err) + else: + self.log.debug(' host %s (%s) ok' % (host, addr)) + except Exception as e: + self.log.debug(' host %s (%s) failed check' % (host, addr)) + return 'host %s (%s) failed check: %s' % (host, addr, e) + return None + + def _refresh_host_daemons(self, host: str) -> Optional[str]: + try: + ls = self._run_cephadm_json(host, 'mon', 'ls', [], no_fsid=True) + except OrchestratorError as e: + return str(e) + dm = {} + for d in ls: + if not d['style'].startswith('cephadm'): + continue + if d['fsid'] != self.mgr._cluster_fsid: + continue + if '.' not in d['name']: + continue + sd = orchestrator.DaemonDescription() + sd.last_refresh = datetime_now() + for k in ['created', 'started', 'last_configured', 'last_deployed']: + v = d.get(k, None) + if v: + setattr(sd, k, str_to_datetime(d[k])) + sd.daemon_type = d['name'].split('.')[0] + if sd.daemon_type not in orchestrator.KNOWN_DAEMON_TYPES: + logger.warning(f"Found unknown daemon type {sd.daemon_type} on host {host}") + continue + + sd.daemon_id = '.'.join(d['name'].split('.')[1:]) + sd.hostname = host + sd.container_id = d.get('container_id') + if sd.container_id: + # shorten the hash + sd.container_id = sd.container_id[0:12] + sd.container_image_name = d.get('container_image_name') + sd.container_image_id = d.get('container_image_id') + sd.container_image_digests = d.get('container_image_digests') + sd.memory_usage = d.get('memory_usage') + sd.memory_request = d.get('memory_request') + sd.memory_limit = d.get('memory_limit') + sd.cpu_percentage = d.get('cpu_percentage') + sd._service_name = d.get('service_name') + sd.deployed_by = d.get('deployed_by') + sd.version = d.get('version') + sd.ports = d.get('ports') + sd.ip = d.get('ip') + sd.rank = int(d['rank']) if d.get('rank') is not None else None + sd.rank_generation = int(d['rank_generation']) if d.get( + 'rank_generation') is not None else None + sd.extra_container_args = d.get('extra_container_args') + if 'state' in d: + sd.status_desc = d['state'] + sd.status = { + 'running': DaemonDescriptionStatus.running, + 'stopped': DaemonDescriptionStatus.stopped, + 'error': DaemonDescriptionStatus.error, + 'unknown': DaemonDescriptionStatus.error, + }[d['state']] + else: + sd.status_desc = 'unknown' + sd.status = None + dm[sd.name()] = sd + self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm))) + self.mgr.cache.update_host_daemons(host, dm) + self.mgr.cache.save_host(host) + return None + + def _refresh_facts(self, host: str) -> Optional[str]: + try: + val = self._run_cephadm_json(host, cephadmNoImage, 'gather-facts', [], no_fsid=True) + except OrchestratorError as e: + return str(e) + + self.mgr.cache.update_host_facts(host, val) + + return None + + def _refresh_host_devices(self, host: str) -> Optional[str]: + with_lsm = self.mgr.device_enhanced_scan + inventory_args = ['--', 'inventory', + '--format=json-pretty', + '--filter-for-batch'] + if with_lsm: + inventory_args.insert(-1, "--with-lsm") + + try: + try: + devices = self._run_cephadm_json(host, 'osd', 'ceph-volume', + inventory_args) + except OrchestratorError as e: + if 'unrecognized arguments: --filter-for-batch' in str(e): + rerun_args = inventory_args.copy() + rerun_args.remove('--filter-for-batch') + devices = self._run_cephadm_json(host, 'osd', 'ceph-volume', + rerun_args) + else: + raise + + networks = self._run_cephadm_json(host, 'mon', 'list-networks', [], no_fsid=True) + except OrchestratorError as e: + return str(e) + + self.log.debug('Refreshed host %s devices (%d) networks (%s)' % ( + host, len(devices), len(networks))) + ret = inventory.Devices.from_json(devices) + self.mgr.cache.update_host_devices_networks(host, ret.devices, networks) + self.update_osdspec_previews(host) + self.mgr.cache.save_host(host) + return None + + def _refresh_host_osdspec_previews(self, host: str) -> Optional[str]: + self.update_osdspec_previews(host) + self.mgr.cache.save_host(host) + self.log.debug(f'Refreshed OSDSpec previews for host <{host}>') + return None + + def update_osdspec_previews(self, search_host: str = '') -> None: + # Set global 'pending' flag for host + self.mgr.cache.loading_osdspec_preview.add(search_host) + previews = [] + # query OSDSpecs for host <search host> and generate/get the preview + # There can be multiple previews for one host due to multiple OSDSpecs. + previews.extend(self.mgr.osd_service.get_previews(search_host)) + self.log.debug(f'Loading OSDSpec previews to HostCache for host <{search_host}>') + self.mgr.cache.osdspec_previews[search_host] = previews + # Unset global 'pending' flag for host + self.mgr.cache.loading_osdspec_preview.remove(search_host) + + def _check_for_strays(self) -> None: + self.log.debug('_check_for_strays') + for k in ['CEPHADM_STRAY_HOST', + 'CEPHADM_STRAY_DAEMON']: + self.mgr.remove_health_warning(k) + if self.mgr.warn_on_stray_hosts or self.mgr.warn_on_stray_daemons: + ls = self.mgr.list_servers() + self.log.debug(ls) + managed = self.mgr.cache.get_daemon_names() + host_detail = [] # type: List[str] + host_num_daemons = 0 + daemon_detail = [] # type: List[str] + for item in ls: + host = item.get('hostname') + assert isinstance(host, str) + daemons = item.get('services') # misnomer! + assert isinstance(daemons, list) + missing_names = [] + for s in daemons: + daemon_id = s.get('id') + assert daemon_id + name = '%s.%s' % (s.get('type'), daemon_id) + if s.get('type') in ['rbd-mirror', 'cephfs-mirror', 'rgw', 'rgw-nfs']: + metadata = self.mgr.get_metadata( + cast(str, s.get('type')), daemon_id, {}) + assert metadata is not None + try: + if s.get('type') == 'rgw-nfs': + # https://tracker.ceph.com/issues/49573 + name = metadata['id'][:-4] + else: + name = '%s.%s' % (s.get('type'), metadata['id']) + except (KeyError, TypeError): + self.log.debug( + "Failed to find daemon id for %s service %s" % ( + s.get('type'), s.get('id') + ) + ) + if s.get('type') == 'tcmu-runner': + # because we don't track tcmu-runner daemons in the host cache + # and don't have a way to check if the daemon is part of iscsi service + # we assume that all tcmu-runner daemons are managed by cephadm + managed.append(name) + if host not in self.mgr.inventory: + missing_names.append(name) + host_num_daemons += 1 + if name not in managed: + daemon_detail.append( + 'stray daemon %s on host %s not managed by cephadm' % (name, host)) + if missing_names: + host_detail.append( + 'stray host %s has %d stray daemons: %s' % ( + host, len(missing_names), missing_names)) + if self.mgr.warn_on_stray_hosts and host_detail: + self.mgr.set_health_warning( + 'CEPHADM_STRAY_HOST', f'{len(host_detail)} stray host(s) with {host_num_daemons} daemon(s) not managed by cephadm', len(host_detail), host_detail) + if self.mgr.warn_on_stray_daemons and daemon_detail: + self.mgr.set_health_warning( + 'CEPHADM_STRAY_DAEMON', f'{len(daemon_detail)} stray daemon(s) not managed by cephadm', len(daemon_detail), daemon_detail) + + def _check_for_moved_osds(self) -> None: + self.log.debug('_check_for_moved_osds') + all_osds: DefaultDict[int, List[orchestrator.DaemonDescription]] = defaultdict(list) + for dd in self.mgr.cache.get_daemons_by_type('osd'): + assert dd.daemon_id + all_osds[int(dd.daemon_id)].append(dd) + for osd_id, dds in all_osds.items(): + if len(dds) <= 1: + continue + running = [dd for dd in dds if dd.status == DaemonDescriptionStatus.running] + error = [dd for dd in dds if dd.status == DaemonDescriptionStatus.error] + msg = f'Found duplicate OSDs: {", ".join(str(dd) for dd in dds)}' + logger.info(msg) + if len(running) != 1: + continue + osd = self.mgr.get_osd_by_id(osd_id) + if not osd or not osd['up']: + continue + for e in error: + assert e.hostname + try: + self._remove_daemon(e.name(), e.hostname, no_post_remove=True) + self.mgr.events.for_daemon( + e.name(), 'INFO', f"Removed duplicated daemon on host '{e.hostname}'") + except OrchestratorError as ex: + self.mgr.events.from_orch_error(ex) + logger.exception(f'failed to remove duplicated daemon {e}') + + def _apply_all_services(self) -> bool: + self.log.debug('_apply_all_services') + r = False + specs = [] # type: List[ServiceSpec] + for sn, spec in self.mgr.spec_store.active_specs.items(): + specs.append(spec) + for name in ['CEPHADM_APPLY_SPEC_FAIL', 'CEPHADM_DAEMON_PLACE_FAIL']: + self.mgr.remove_health_warning(name) + self.mgr.apply_spec_fails = [] + for spec in specs: + try: + if self._apply_service(spec): + r = True + except Exception as e: + msg = f'Failed to apply {spec.service_name()} spec {spec}: {str(e)}' + self.log.exception(msg) + self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e)) + self.mgr.apply_spec_fails.append((spec.service_name(), str(e))) + warnings = [] + for x in self.mgr.apply_spec_fails: + warnings.append(f'{x[0]}: {x[1]}') + self.mgr.set_health_warning('CEPHADM_APPLY_SPEC_FAIL', + f"Failed to apply {len(self.mgr.apply_spec_fails)} service(s): {','.join(x[0] for x in self.mgr.apply_spec_fails)}", + len(self.mgr.apply_spec_fails), + warnings) + self.mgr.update_watched_hosts() + return r + + def _apply_service_config(self, spec: ServiceSpec) -> None: + if spec.config: + section = utils.name_to_config_section(spec.service_name()) + for name in ['CEPHADM_INVALID_CONFIG_OPTION', 'CEPHADM_FAILED_SET_OPTION']: + self.mgr.remove_health_warning(name) + invalid_config_options = [] + options_failed_to_set = [] + for k, v in spec.config.items(): + try: + current = self.mgr.get_foreign_ceph_option(section, k) + except KeyError: + msg = f'Ignoring invalid {spec.service_name()} config option {k}' + self.log.warning(msg) + self.mgr.events.for_service( + spec, OrchestratorEvent.ERROR, f'Invalid config option {k}' + ) + invalid_config_options.append(msg) + continue + if current != v: + self.log.debug(f'setting [{section}] {k} = {v}') + try: + self.mgr.check_mon_command({ + 'prefix': 'config set', + 'name': k, + 'value': str(v), + 'who': section, + }) + except MonCommandFailed as e: + msg = f'Failed to set {spec.service_name()} option {k}: {e}' + self.log.warning(msg) + options_failed_to_set.append(msg) + + if invalid_config_options: + self.mgr.set_health_warning('CEPHADM_INVALID_CONFIG_OPTION', f'Ignoring {len(invalid_config_options)} invalid config option(s)', len(invalid_config_options), invalid_config_options) + if options_failed_to_set: + self.mgr.set_health_warning('CEPHADM_FAILED_SET_OPTION', f'Failed to set {len(options_failed_to_set)} option(s)', len(options_failed_to_set), options_failed_to_set) + + def _apply_service(self, spec: ServiceSpec) -> bool: + """ + Schedule a service. Deploy new daemons or remove old ones, depending + on the target label and count specified in the placement. + """ + self.mgr.migration.verify_no_migration() + + service_type = spec.service_type + service_name = spec.service_name() + if spec.unmanaged: + self.log.debug('Skipping unmanaged service %s' % service_name) + return False + if spec.preview_only: + self.log.debug('Skipping preview_only service %s' % service_name) + return False + self.log.debug('Applying service %s spec' % service_name) + + self._apply_service_config(spec) + + if service_type == 'osd': + self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) + # TODO: return True would result in a busy loop + # can't know if daemon count changed; create_from_spec doesn't + # return a solid indication + return False + + svc = self.mgr.cephadm_services[service_type] + daemons = self.mgr.cache.get_daemons_by_service(service_name) + + public_networks: List[str] = [] + if service_type == 'mon': + out = str(self.mgr.get_foreign_ceph_option('mon', 'public_network')) + if '/' in out: + public_networks = [x.strip() for x in out.split(',')] + self.log.debug('mon public_network(s) is %s' % public_networks) + + def matches_network(host): + # type: (str) -> bool + # make sure we have 1 or more IPs for any of those networks on that + # host + for network in public_networks: + if len(self.mgr.cache.networks[host].get(network, [])) > 0: + return True + self.log.info( + f"Filtered out host {host}: does not belong to mon public_network" + f" ({','.join(public_networks)})" + ) + return False + + rank_map = None + if svc.ranked(): + rank_map = self.mgr.spec_store[spec.service_name()].rank_map or {} + ha = HostAssignment( + spec=spec, + hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), + daemons=daemons, + networks=self.mgr.cache.networks, + filter_new_host=( + matches_network if service_type == 'mon' + else None + ), + allow_colo=svc.allow_colo(), + primary_daemon_type=svc.primary_daemon_type(), + per_host_daemon_type=svc.per_host_daemon_type(), + rank_map=rank_map, + ) + + try: + all_slots, slots_to_add, daemons_to_remove = ha.place() + daemons_to_remove = [d for d in daemons_to_remove if (d.hostname and self.mgr.inventory._inventory[d.hostname].get( + 'status', '').lower() not in ['maintenance', 'offline'] and d.hostname not in self.mgr.offline_hosts)] + self.log.debug('Add %s, remove %s' % (slots_to_add, daemons_to_remove)) + except OrchestratorError as e: + msg = f'Failed to apply {spec.service_name()} spec {spec}: {str(e)}' + self.log.error(msg) + self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e)) + self.mgr.apply_spec_fails.append((spec.service_name(), str(e))) + warnings = [] + for x in self.mgr.apply_spec_fails: + warnings.append(f'{x[0]}: {x[1]}') + self.mgr.set_health_warning('CEPHADM_APPLY_SPEC_FAIL', + f"Failed to apply {len(self.mgr.apply_spec_fails)} service(s): {','.join(x[0] for x in self.mgr.apply_spec_fails)}", + len(self.mgr.apply_spec_fails), + warnings) + return False + + r = None + + # sanity check + final_count = len(daemons) + len(slots_to_add) - len(daemons_to_remove) + if service_type in ['mon', 'mgr'] and final_count < 1: + self.log.debug('cannot scale mon|mgr below 1)') + return False + + # progress + progress_id = str(uuid.uuid4()) + delta: List[str] = [] + if slots_to_add: + delta += [f'+{len(slots_to_add)}'] + if daemons_to_remove: + delta += [f'-{len(daemons_to_remove)}'] + progress_title = f'Updating {spec.service_name()} deployment ({" ".join(delta)} -> {len(all_slots)})' + progress_total = len(slots_to_add) + len(daemons_to_remove) + progress_done = 0 + + def update_progress() -> None: + self.mgr.remote( + 'progress', 'update', progress_id, + ev_msg=progress_title, + ev_progress=(progress_done / progress_total), + add_to_ceph_s=True, + ) + + if progress_total: + update_progress() + + # add any? + did_config = False + + self.log.debug('Hosts that will receive new daemons: %s' % slots_to_add) + self.log.debug('Daemons that will be removed: %s' % daemons_to_remove) + + try: + # assign names + for i in range(len(slots_to_add)): + slot = slots_to_add[i] + slot = slot.assign_name(self.mgr.get_unique_name( + slot.daemon_type, + slot.hostname, + [d for d in daemons if d not in daemons_to_remove], + prefix=spec.service_id, + forcename=slot.name, + rank=slot.rank, + rank_generation=slot.rank_generation, + )) + slots_to_add[i] = slot + if rank_map is not None: + assert slot.rank is not None + assert slot.rank_generation is not None + assert rank_map[slot.rank][slot.rank_generation] is None + rank_map[slot.rank][slot.rank_generation] = slot.name + + if rank_map: + # record the rank_map before we make changes so that if we fail the + # next mgr will clean up. + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + + # remove daemons now, since we are going to fence them anyway + for d in daemons_to_remove: + assert d.hostname is not None + self._remove_daemon(d.name(), d.hostname) + daemons_to_remove = [] + + # fence them + svc.fence_old_ranks(spec, rank_map, len(all_slots)) + + # create daemons + daemon_place_fails = [] + for slot in slots_to_add: + # first remove daemon with conflicting port or name? + if slot.ports or slot.name in [d.name() for d in daemons_to_remove]: + for d in daemons_to_remove: + if ( + d.hostname != slot.hostname + or not (set(d.ports or []) & set(slot.ports)) + or (d.ip and slot.ip and d.ip != slot.ip) + and d.name() != slot.name + ): + continue + if d.name() != slot.name: + self.log.info( + f'Removing {d.name()} before deploying to {slot} to avoid a port or conflict' + ) + # NOTE: we don't check ok-to-stop here to avoid starvation if + # there is only 1 gateway. + self._remove_daemon(d.name(), d.hostname) + daemons_to_remove.remove(d) + progress_done += 1 + break + + # deploy new daemon + daemon_id = slot.name + if not did_config: + svc.config(spec) + did_config = True + + daemon_spec = svc.make_daemon_spec( + slot.hostname, daemon_id, slot.network, spec, + daemon_type=slot.daemon_type, + ports=slot.ports, + ip=slot.ip, + rank=slot.rank, + rank_generation=slot.rank_generation, + ) + self.log.debug('Placing %s.%s on host %s' % ( + slot.daemon_type, daemon_id, slot.hostname)) + + try: + daemon_spec = svc.prepare_create(daemon_spec) + self._create_daemon(daemon_spec) + r = True + progress_done += 1 + update_progress() + except (RuntimeError, OrchestratorError) as e: + msg = (f"Failed while placing {slot.daemon_type}.{daemon_id} " + f"on {slot.hostname}: {e}") + self.mgr.events.for_service(spec, 'ERROR', msg) + self.mgr.log.error(msg) + daemon_place_fails.append(msg) + # only return "no change" if no one else has already succeeded. + # later successes will also change to True + if r is None: + r = False + progress_done += 1 + update_progress() + continue + + # add to daemon list so next name(s) will also be unique + sd = orchestrator.DaemonDescription( + hostname=slot.hostname, + daemon_type=slot.daemon_type, + daemon_id=daemon_id, + ) + daemons.append(sd) + + if daemon_place_fails: + self.mgr.set_health_warning('CEPHADM_DAEMON_PLACE_FAIL', f'Failed to place {len(daemon_place_fails)} daemon(s)', len(daemon_place_fails), daemon_place_fails) + + if service_type == 'mgr': + active_mgr = svc.get_active_daemon(self.mgr.cache.get_daemons_by_type('mgr')) + if active_mgr.daemon_id in [d.daemon_id for d in daemons_to_remove]: + # We can't just remove the active mgr like any other daemon. + # Need to fail over later so it can be removed on next pass. + # This can be accomplished by scheduling a restart of the active mgr. + self.mgr._schedule_daemon_action(active_mgr.name(), 'restart') + + # remove any? + def _ok_to_stop(remove_daemons: List[orchestrator.DaemonDescription]) -> bool: + daemon_ids = [d.daemon_id for d in remove_daemons] + assert None not in daemon_ids + # setting force flag retains previous behavior + r = svc.ok_to_stop(cast(List[str], daemon_ids), force=True) + return not r.retval + + while daemons_to_remove and not _ok_to_stop(daemons_to_remove): + # let's find a subset that is ok-to-stop + daemons_to_remove.pop() + for d in daemons_to_remove: + r = True + assert d.hostname is not None + self._remove_daemon(d.name(), d.hostname) + + progress_done += 1 + update_progress() + + self.mgr.remote('progress', 'complete', progress_id) + except Exception as e: + self.mgr.remote('progress', 'fail', progress_id, str(e)) + raise + + if r is None: + r = False + return r + + def _check_daemons(self) -> None: + self.log.debug('_check_daemons') + daemons = self.mgr.cache.get_daemons() + daemons_post: Dict[str, List[orchestrator.DaemonDescription]] = defaultdict(list) + for dd in daemons: + # orphan? + spec = self.mgr.spec_store.active_specs.get(dd.service_name(), None) + assert dd.hostname is not None + assert dd.daemon_type is not None + assert dd.daemon_id is not None + if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']: + # (mon and mgr specs should always exist; osds aren't matched + # to a service spec) + self.log.info('Removing orphan daemon %s...' % dd.name()) + self._remove_daemon(dd.name(), dd.hostname) + + # ignore unmanaged services + if spec and spec.unmanaged: + continue + + # ignore daemons for deleted services + if dd.service_name() in self.mgr.spec_store.spec_deleted: + continue + + # These daemon types require additional configs after creation + if dd.daemon_type in REQUIRES_POST_ACTIONS: + daemons_post[dd.daemon_type].append(dd) + + if self.mgr.cephadm_services[daemon_type_to_service(dd.daemon_type)].get_active_daemon( + self.mgr.cache.get_daemons_by_service(dd.service_name())).daemon_id == dd.daemon_id: + dd.is_active = True + else: + dd.is_active = False + + deps = self.mgr._calc_daemon_deps(spec, dd.daemon_type, dd.daemon_id) + last_deps, last_config = self.mgr.cache.get_daemon_last_config_deps( + dd.hostname, dd.name()) + if last_deps is None: + last_deps = [] + action = self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) + if not last_config: + self.log.info('Reconfiguring %s (unknown last config time)...' % ( + dd.name())) + action = 'reconfig' + elif last_deps != deps: + self.log.debug('%s deps %s -> %s' % (dd.name(), last_deps, + deps)) + self.log.info('Reconfiguring %s (dependencies changed)...' % ( + dd.name())) + action = 'reconfig' + elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args: + self.log.debug( + f'{dd.name()} container cli args {dd.extra_container_args} -> {spec.extra_container_args}') + self.log.info(f'Redeploying {dd.name()}, (container cli args changed) . . .') + dd.extra_container_args = spec.extra_container_args + action = 'redeploy' + elif self.mgr.last_monmap and \ + self.mgr.last_monmap > last_config and \ + dd.daemon_type in CEPH_TYPES: + self.log.info('Reconfiguring %s (monmap changed)...' % dd.name()) + action = 'reconfig' + elif self.mgr.extra_ceph_conf_is_newer(last_config) and \ + dd.daemon_type in CEPH_TYPES: + self.log.info('Reconfiguring %s (extra config changed)...' % dd.name()) + action = 'reconfig' + if action: + if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \ + and action == 'reconfig': + action = 'redeploy' + try: + daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd) + self.mgr._daemon_action(daemon_spec, action=action) + if self.mgr.cache.rm_scheduled_daemon_action(dd.hostname, dd.name()): + self.mgr.cache.save_host(dd.hostname) + except OrchestratorError as e: + self.mgr.events.from_orch_error(e) + if dd.daemon_type in daemons_post: + del daemons_post[dd.daemon_type] + # continue... + except Exception as e: + self.mgr.events.for_daemon_from_exception(dd.name(), e) + if dd.daemon_type in daemons_post: + del daemons_post[dd.daemon_type] + # continue... + + # do daemon post actions + for daemon_type, daemon_descs in daemons_post.items(): + run_post = False + for d in daemon_descs: + if d.name() in self.mgr.requires_post_actions: + self.mgr.requires_post_actions.remove(d.name()) + run_post = True + if run_post: + self.mgr._get_cephadm_service(daemon_type_to_service( + daemon_type)).daemon_check_post(daemon_descs) + + def _purge_deleted_services(self) -> None: + self.log.debug('_purge_deleted_services') + existing_services = self.mgr.spec_store.all_specs.items() + for service_name, spec in list(existing_services): + if service_name not in self.mgr.spec_store.spec_deleted: + continue + if self.mgr.cache.get_daemons_by_service(service_name): + continue + if spec.service_type in ['mon', 'mgr']: + continue + + logger.info(f'Purge service {service_name}') + + self.mgr.cephadm_services[spec.service_type].purge(service_name) + self.mgr.spec_store.finally_rm(service_name) + + def convert_tags_to_repo_digest(self) -> None: + if not self.mgr.use_repo_digest: + return + settings = self.mgr.upgrade.get_distinct_container_image_settings() + digests: Dict[str, ContainerInspectInfo] = {} + for container_image_ref in set(settings.values()): + if not is_repo_digest(container_image_ref): + image_info = self._get_container_image_info(container_image_ref) + if image_info.repo_digests: + # FIXME: we assume the first digest here is the best + assert is_repo_digest(image_info.repo_digests[0]), image_info + digests[container_image_ref] = image_info + + for entity, container_image_ref in settings.items(): + if not is_repo_digest(container_image_ref): + image_info = digests[container_image_ref] + if image_info.repo_digests: + # FIXME: we assume the first digest here is the best + self.mgr.set_container_image(entity, image_info.repo_digests[0]) + + def _calc_client_files(self) -> Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]]: + # host -> path -> (mode, uid, gid, content, digest) + client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]] = {} + + # ceph.conf + config = self.mgr.get_minimal_ceph_conf().encode('utf-8') + config_digest = ''.join('%02x' % c for c in hashlib.sha256(config).digest()) + + if self.mgr.manage_etc_ceph_ceph_conf: + try: + pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts) + ha = HostAssignment( + spec=ServiceSpec('mon', placement=pspec), + hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), + daemons=[], + networks=self.mgr.cache.networks, + ) + all_slots, _, _ = ha.place() + for host in {s.hostname for s in all_slots}: + if host not in client_files: + client_files[host] = {} + client_files[host]['/etc/ceph/ceph.conf'] = ( + 0o644, 0, 0, bytes(config), str(config_digest) + ) + except Exception as e: + self.mgr.log.warning( + f'unable to calc conf hosts: {self.mgr.manage_etc_ceph_ceph_conf_hosts}: {e}') + + # client keyrings + for ks in self.mgr.keys.keys.values(): + try: + ret, keyring, err = self.mgr.mon_command({ + 'prefix': 'auth get', + 'entity': ks.entity, + }) + if ret: + self.log.warning(f'unable to fetch keyring for {ks.entity}') + continue + digest = ''.join('%02x' % c for c in hashlib.sha256( + keyring.encode('utf-8')).digest()) + ha = HostAssignment( + spec=ServiceSpec('mon', placement=ks.placement), + hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), + daemons=[], + networks=self.mgr.cache.networks, + ) + all_slots, _, _ = ha.place() + for host in {s.hostname for s in all_slots}: + if host not in client_files: + client_files[host] = {} + client_files[host]['/etc/ceph/ceph.conf'] = ( + 0o644, 0, 0, bytes(config), str(config_digest) + ) + client_files[host][ks.path] = ( + ks.mode, ks.uid, ks.gid, keyring.encode('utf-8'), digest + ) + except Exception as e: + self.log.warning( + f'unable to calc client keyring {ks.entity} placement {ks.placement}: {e}') + return client_files + + def _write_client_files(self, + client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]], + host: str) -> None: + updated_files = False + old_files = self.mgr.cache.get_host_client_files(host).copy() + for path, m in client_files.get(host, {}).items(): + mode, uid, gid, content, digest = m + if path in old_files: + match = old_files[path] == (digest, mode, uid, gid) + del old_files[path] + if match: + continue + self.log.info(f'Updating {host}:{path}') + self._write_remote_file(host, path, content, mode, uid, gid) + self.mgr.cache.update_client_file(host, path, digest, mode, uid, gid) + updated_files = True + for path in old_files.keys(): + if path == '/etc/ceph/ceph.conf': + continue + self.log.info(f'Removing {host}:{path}') + with self._remote_connection(host) as tpl: + conn, connr = tpl + out, err, code = remoto.process.check( + conn, + ['rm', '-f', path]) + updated_files = True + self.mgr.cache.removed_client_file(host, path) + if updated_files: + self.mgr.cache.save_host(host) + + def _create_daemon(self, + daemon_spec: CephadmDaemonDeploySpec, + reconfig: bool = False, + osd_uuid_map: Optional[Dict[str, Any]] = None, + ) -> str: + + with set_exception_subject('service', orchestrator.DaemonDescription( + daemon_type=daemon_spec.daemon_type, + daemon_id=daemon_spec.daemon_id, + hostname=daemon_spec.host, + ).service_id(), overwrite=True): + + try: + image = '' + start_time = datetime_now() + ports: List[int] = daemon_spec.ports if daemon_spec.ports else [] + + if daemon_spec.daemon_type == 'container': + spec = cast(CustomContainerSpec, + self.mgr.spec_store[daemon_spec.service_name].spec) + image = spec.image + if spec.ports: + ports.extend(spec.ports) + + if daemon_spec.daemon_type == 'cephadm-exporter': + if not reconfig: + assert daemon_spec.host + self._deploy_cephadm_binary(daemon_spec.host) + + # TCP port to open in the host firewall + if len(ports) > 0: + daemon_spec.extra_args.extend([ + '--tcp-ports', ' '.join(map(str, ports)) + ]) + + # osd deployments needs an --osd-uuid arg + if daemon_spec.daemon_type == 'osd': + if not osd_uuid_map: + osd_uuid_map = self.mgr.get_osd_uuid_map() + osd_uuid = osd_uuid_map.get(daemon_spec.daemon_id) + if not osd_uuid: + raise OrchestratorError('osd.%s not in osdmap' % daemon_spec.daemon_id) + daemon_spec.extra_args.extend(['--osd-fsid', osd_uuid]) + + if reconfig: + daemon_spec.extra_args.append('--reconfig') + if self.mgr.allow_ptrace: + daemon_spec.extra_args.append('--allow-ptrace') + + try: + eca = daemon_spec.extra_container_args + if eca: + for a in eca: + daemon_spec.extra_args.append(f'--extra-container-args={a}') + except AttributeError: + eca = None + + if self.mgr.cache.host_needs_registry_login(daemon_spec.host) and self.mgr.registry_url: + self._registry_login(daemon_spec.host, + json.loads(str(self.mgr.get_store('registry_credentials')))) + + self.log.info('%s daemon %s on %s' % ( + 'Reconfiguring' if reconfig else 'Deploying', + daemon_spec.name(), daemon_spec.host)) + + out, err, code = self._run_cephadm( + daemon_spec.host, daemon_spec.name(), 'deploy', + [ + '--name', daemon_spec.name(), + '--meta-json', json.dumps({ + 'service_name': daemon_spec.service_name, + 'ports': daemon_spec.ports, + 'ip': daemon_spec.ip, + 'deployed_by': self.mgr.get_active_mgr_digests(), + 'rank': daemon_spec.rank, + 'rank_generation': daemon_spec.rank_generation, + 'extra_container_args': eca + }), + '--config-json', '-', + ] + daemon_spec.extra_args, + stdin=json.dumps(daemon_spec.final_config), + image=image, + ) + + # refresh daemon state? (ceph daemon reconfig does not need it) + if not reconfig or daemon_spec.daemon_type not in CEPH_TYPES: + if not code and daemon_spec.host in self.mgr.cache.daemons: + # prime cached service state with what we (should have) + # just created + sd = daemon_spec.to_daemon_description( + DaemonDescriptionStatus.starting, 'starting') + self.mgr.cache.add_daemon(daemon_spec.host, sd) + if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS: + self.mgr.requires_post_actions.add(daemon_spec.name()) + self.mgr.cache.invalidate_host_daemons(daemon_spec.host) + + self.mgr.cache.update_daemon_config_deps( + daemon_spec.host, daemon_spec.name(), daemon_spec.deps, start_time) + self.mgr.cache.save_host(daemon_spec.host) + msg = "{} {} on host '{}'".format( + 'Reconfigured' if reconfig else 'Deployed', daemon_spec.name(), daemon_spec.host) + if not code: + self.mgr.events.for_daemon(daemon_spec.name(), OrchestratorEvent.INFO, msg) + else: + what = 'reconfigure' if reconfig else 'deploy' + self.mgr.events.for_daemon( + daemon_spec.name(), OrchestratorEvent.ERROR, f'Failed to {what}: {err}') + return msg + except OrchestratorError: + redeploy = daemon_spec.name() in self.mgr.cache.get_daemon_names() + if not reconfig and not redeploy: + # we have to clean up the daemon. E.g. keyrings. + servict_type = daemon_type_to_service(daemon_spec.daemon_type) + dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed') + self.mgr.cephadm_services[servict_type].post_remove(dd, is_failed_deploy=True) + raise + + def _remove_daemon(self, name: str, host: str, no_post_remove: bool = False) -> str: + """ + Remove a daemon + """ + (daemon_type, daemon_id) = name.split('.', 1) + daemon = orchestrator.DaemonDescription( + daemon_type=daemon_type, + daemon_id=daemon_id, + hostname=host) + + with set_exception_subject('service', daemon.service_id(), overwrite=True): + + self.mgr.cephadm_services[daemon_type_to_service(daemon_type)].pre_remove(daemon) + # NOTE: we are passing the 'force' flag here, which means + # we can delete a mon instances data. + dd = self.mgr.cache.get_daemon(daemon.daemon_name) + if dd.ports: + args = ['--name', name, '--force', '--tcp-ports', ' '.join(map(str, dd.ports))] + else: + args = ['--name', name, '--force'] + + self.log.info('Removing daemon %s from %s -- ports %s' % (name, host, dd.ports)) + out, err, code = self._run_cephadm( + host, name, 'rm-daemon', args) + if not code: + # remove item from cache + self.mgr.cache.rm_daemon(host, name) + self.mgr.cache.invalidate_host_daemons(host) + + if not no_post_remove: + self.mgr.cephadm_services[daemon_type_to_service( + daemon_type)].post_remove(daemon, is_failed_deploy=False) + + return "Removed {} from host '{}'".format(name, host) + + def _run_cephadm_json(self, + host: str, + entity: Union[CephadmNoImage, str], + command: str, + args: List[str], + no_fsid: Optional[bool] = False, + image: Optional[str] = "", + ) -> Any: + try: + out, err, code = self._run_cephadm( + host, entity, command, args, no_fsid=no_fsid, image=image) + if code: + raise OrchestratorError(f'host {host} `cephadm {command}` returned {code}: {err}') + except Exception as e: + raise OrchestratorError(f'host {host} `cephadm {command}` failed: {e}') + try: + return json.loads(''.join(out)) + except (ValueError, KeyError): + msg = f'host {host} `cephadm {command}` failed: Cannot decode JSON' + self.log.exception(f'{msg}: {"".join(out)}') + raise OrchestratorError(msg) + + def _run_cephadm(self, + host: str, + entity: Union[CephadmNoImage, str], + command: str, + args: List[str], + addr: Optional[str] = "", + stdin: Optional[str] = "", + no_fsid: Optional[bool] = False, + error_ok: Optional[bool] = False, + image: Optional[str] = "", + env_vars: Optional[List[str]] = None, + ) -> Tuple[List[str], List[str], int]: + """ + Run cephadm on the remote host with the given command + args + + Important: You probably don't want to run _run_cephadm from CLI handlers + + :env_vars: in format -> [KEY=VALUE, ..] + """ + self.log.debug(f"_run_cephadm : command = {command}") + self.log.debug(f"_run_cephadm : args = {args}") + + bypass_image = ('cephadm-exporter',) + + with self._remote_connection(host, addr) as tpl: + conn, connr = tpl + assert image or entity + # Skip the image check for daemons deployed that are not ceph containers + if not str(entity).startswith(bypass_image): + if not image and entity is not cephadmNoImage: + image = self.mgr._get_container_image(entity) + + final_args = [] + + # global args + if env_vars: + for env_var_pair in env_vars: + final_args.extend(['--env', env_var_pair]) + + if image: + final_args.extend(['--image', image]) + + if not self.mgr.container_init: + final_args += ['--no-container-init'] + + # subcommand + final_args.append(command) + + # subcommand args + if not no_fsid: + final_args += ['--fsid', self.mgr._cluster_fsid] + + final_args += args + + # exec + self.log.debug('args: %s' % (' '.join(final_args))) + if self.mgr.mode == 'root': + if stdin: + self.log.debug('stdin: %s' % stdin) + + try: + # if host has gone offline this is likely where we'll fail first + python = connr.choose_python() + except RuntimeError as e: + self.mgr.offline_hosts.add(host) + self.mgr._reset_con(host) + if error_ok: + return [], [str(e)], 1 + raise + if not python: + raise RuntimeError( + 'unable to find python on %s (tried %s in %s)' % ( + host, remotes.PYTHONS, remotes.PATH)) + try: + out, err, code = remoto.process.check( + conn, + [python, self.mgr.cephadm_binary_path] + final_args, + stdin=stdin.encode('utf-8') if stdin is not None else None) + if code == 2: + out_ls, err_ls, code_ls = remoto.process.check( + conn, ['ls', self.mgr.cephadm_binary_path]) + if code_ls == 2: + self._deploy_cephadm_binary_conn(conn, host) + out, err, code = remoto.process.check( + conn, + [python, self.mgr.cephadm_binary_path] + final_args, + stdin=stdin.encode('utf-8') if stdin is not None else None) + + except RuntimeError as e: + self.mgr._reset_con(host) + if error_ok: + return [], [str(e)], 1 + raise + + elif self.mgr.mode == 'cephadm-package': + try: + out, err, code = remoto.process.check( + conn, + ['sudo', '/usr/bin/cephadm'] + final_args, + stdin=stdin) + except RuntimeError as e: + self.mgr._reset_con(host) + if error_ok: + return [], [str(e)], 1 + raise + else: + assert False, 'unsupported mode' + + self.log.debug('code: %d' % code) + if out: + self.log.debug('out: %s' % '\n'.join(out)) + if err: + self.log.debug('err: %s' % '\n'.join(err)) + if code and not error_ok: + raise OrchestratorError( + 'cephadm exited with an error code: %d, stderr:%s' % ( + code, '\n'.join(err))) + return out, err, code + + def _get_container_image_info(self, image_name: str) -> ContainerInspectInfo: + # pick a random host... + host = None + for host_name in self.mgr.inventory.keys(): + host = host_name + break + if not host: + raise OrchestratorError('no hosts defined') + if self.mgr.cache.host_needs_registry_login(host) and self.mgr.registry_url: + self._registry_login(host, + json.loads(str(self.mgr.get_store('registry_credentials')))) + + pullargs: List[str] = [] + if self.mgr.registry_insecure: + pullargs.append("--insecure") + + j = self._run_cephadm_json(host, '', 'pull', pullargs, image=image_name, no_fsid=True) + + r = ContainerInspectInfo( + j['image_id'], + j.get('ceph_version'), + j.get('repo_digests') + ) + self.log.debug(f'image {image_name} -> {r}') + return r + + # function responsible for logging single host into custom registry + def _registry_login(self, host: str, registry_json: Dict[str, str]) -> Optional[str]: + self.log.debug( + f"Attempting to log host {host} into custom registry @ {registry_json['url']}") + # want to pass info over stdin rather than through normal list of args + out, err, code = self._run_cephadm( + host, 'mon', 'registry-login', + ['--registry-json', '-'], stdin=json.dumps(registry_json), error_ok=True) + if code: + return f"Host {host} failed to login to {registry_json['url']} as {registry_json['username']} with given password" + return None + + def _deploy_cephadm_binary(self, host: str) -> None: + # Use tee (from coreutils) to create a copy of cephadm on the target machine + self.log.info(f"Deploying cephadm binary to {host}") + with self._remote_connection(host) as tpl: + conn, _connr = tpl + return self._deploy_cephadm_binary_conn(conn, host) + + def _deploy_cephadm_binary_conn(self, conn: "BaseConnection", host: str) -> None: + _out, _err, code = remoto.process.check( + conn, + ['mkdir', '-p', f'/var/lib/ceph/{self.mgr._cluster_fsid}']) + if code: + msg = f"Unable to deploy the cephadm binary to {host}: {_err}" + self.log.warning(msg) + raise OrchestratorError(msg) + _out, _err, code = remoto.process.check( + conn, + ['tee', '-', self.mgr.cephadm_binary_path], + stdin=self.mgr._cephadm.encode('utf-8')) + if code: + msg = f"Unable to deploy the cephadm binary to {host}: {_err}" + self.log.warning(msg) + raise OrchestratorError(msg) + + def _write_remote_file(self, + host: str, + path: str, + content: bytes, + mode: int, + uid: int, + gid: int) -> None: + with self._remote_connection(host) as tpl: + conn, connr = tpl + try: + errmsg = connr.write_file(path, content, mode, uid, gid) + if errmsg is not None: + raise OrchestratorError(errmsg) + except Exception as e: + msg = f"Unable to write {host}:{path}: {e}" + self.log.warning(msg) + raise OrchestratorError(msg) + + @contextmanager + def _remote_connection(self, + host: str, + addr: Optional[str] = None, + ) -> Iterator[Tuple["BaseConnection", Any]]: + if not addr and host in self.mgr.inventory: + addr = self.mgr.inventory.get_addr(host) + + self.mgr.offline_hosts_remove(host) + + try: + try: + if not addr: + raise OrchestratorError("host address is empty") + conn, connr = self.mgr._get_connection(addr) + except OSError as e: + self.mgr._reset_con(host) + msg = f"Can't communicate with remote host `{addr}`, possibly because python3 is not installed there: {str(e)}" + raise execnet.gateway_bootstrap.HostNotFound(msg) + + yield (conn, connr) + + except execnet.gateway_bootstrap.HostNotFound as e: + # this is a misleading exception as it seems to be thrown for + # any sort of connection failure, even those having nothing to + # do with "host not found" (e.g., ssh key permission denied). + self.mgr.offline_hosts.add(host) + self.mgr._reset_con(host) + + user = self.mgr.ssh_user if self.mgr.mode == 'root' else 'cephadm' + if str(e).startswith("Can't communicate"): + msg = str(e) + else: + msg = f'''Failed to connect to {host} ({addr}). +Please make sure that the host is reachable and accepts connections using the cephadm SSH key + +To add the cephadm SSH key to the host: +> ceph cephadm get-pub-key > ~/ceph.pub +> ssh-copy-id -f -i ~/ceph.pub {user}@{addr} + +To check that the host is reachable open a new shell with the --no-hosts flag: +> cephadm shell --no-hosts + +Then run the following: +> ceph cephadm get-ssh-config > ssh_config +> ceph config-key get mgr/cephadm/ssh_identity_key > ~/cephadm_private_key +> chmod 0600 ~/cephadm_private_key +> ssh -F ssh_config -i ~/cephadm_private_key {user}@{addr}''' + raise OrchestratorError(msg) from e + except Exception as ex: + self.log.exception(ex) + raise diff --git a/src/pybind/mgr/cephadm/services/__init__.py b/src/pybind/mgr/cephadm/services/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/pybind/mgr/cephadm/services/__init__.py diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py new file mode 100644 index 000000000..92d293fcd --- /dev/null +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -0,0 +1,1043 @@ +import errno +import json +import logging +import re +import socket +import time +from abc import ABCMeta, abstractmethod +from typing import TYPE_CHECKING, List, Callable, TypeVar, \ + Optional, Dict, Any, Tuple, NewType, cast + +from mgr_module import HandleCommandResult, MonCommandFailed + +from ceph.deployment.service_spec import ServiceSpec, RGWSpec +from ceph.deployment.utils import is_ipv6, unwrap_ipv6 +from mgr_util import build_url +from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus +from orchestrator._interface import daemon_type_to_service +from cephadm import utils + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + +ServiceSpecs = TypeVar('ServiceSpecs', bound=ServiceSpec) +AuthEntity = NewType('AuthEntity', str) + + +class CephadmDaemonDeploySpec: + # typing.NamedTuple + Generic is broken in py36 + def __init__(self, host: str, daemon_id: str, + service_name: str, + network: Optional[str] = None, + keyring: Optional[str] = None, + extra_args: Optional[List[str]] = None, + ceph_conf: str = '', + extra_files: Optional[Dict[str, Any]] = None, + daemon_type: Optional[str] = None, + ip: Optional[str] = None, + ports: Optional[List[int]] = None, + rank: Optional[int] = None, + rank_generation: Optional[int] = None, + extra_container_args: Optional[List[str]] = None): + """ + A data struction to encapsulate `cephadm deploy ... + """ + self.host: str = host + self.daemon_id = daemon_id + self.service_name = service_name + daemon_type = daemon_type or (service_name.split('.')[0]) + assert daemon_type is not None + self.daemon_type: str = daemon_type + + # mons + self.network = network + + # for run_cephadm. + self.keyring: Optional[str] = keyring + + # For run_cephadm. Would be great to have more expressive names. + self.extra_args: List[str] = extra_args or [] + + self.ceph_conf = ceph_conf + self.extra_files = extra_files or {} + + # TCP ports used by the daemon + self.ports: List[int] = ports or [] + self.ip: Optional[str] = ip + + # values to be populated during generate_config calls + # and then used in _run_cephadm + self.final_config: Dict[str, Any] = {} + self.deps: List[str] = [] + + self.rank: Optional[int] = rank + self.rank_generation: Optional[int] = rank_generation + + self.extra_container_args = extra_container_args + + def name(self) -> str: + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def config_get_files(self) -> Dict[str, Any]: + files = self.extra_files + if self.ceph_conf: + files['config'] = self.ceph_conf + + return files + + @staticmethod + def from_daemon_description(dd: DaemonDescription) -> 'CephadmDaemonDeploySpec': + assert dd.hostname + assert dd.daemon_id + assert dd.daemon_type + return CephadmDaemonDeploySpec( + host=dd.hostname, + daemon_id=dd.daemon_id, + daemon_type=dd.daemon_type, + service_name=dd.service_name(), + ip=dd.ip, + ports=dd.ports, + rank=dd.rank, + rank_generation=dd.rank_generation, + extra_container_args=dd.extra_container_args, + ) + + def to_daemon_description(self, status: DaemonDescriptionStatus, status_desc: str) -> DaemonDescription: + return DaemonDescription( + daemon_type=self.daemon_type, + daemon_id=self.daemon_id, + service_name=self.service_name, + hostname=self.host, + status=status, + status_desc=status_desc, + ip=self.ip, + ports=self.ports, + rank=self.rank, + rank_generation=self.rank_generation, + extra_container_args=self.extra_container_args, + ) + + +class CephadmService(metaclass=ABCMeta): + """ + Base class for service types. Often providing a create() and config() fn. + """ + + @property + @abstractmethod + def TYPE(self) -> str: + pass + + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr: "CephadmOrchestrator" = mgr + + def allow_colo(self) -> bool: + """ + Return True if multiple daemons of the same type can colocate on + the same host. + """ + return False + + def primary_daemon_type(self) -> str: + """ + This is the type of the primary (usually only) daemon to be deployed. + """ + return self.TYPE + + def per_host_daemon_type(self) -> Optional[str]: + """ + If defined, this type of daemon will be deployed once for each host + containing one or more daemons of the primary type. + """ + return None + + def ranked(self) -> bool: + """ + If True, we will assign a stable rank (0, 1, ...) and monotonically increasing + generation (0, 1, ...) to each daemon we create/deploy. + """ + return False + + def fence_old_ranks(self, + spec: ServiceSpec, + rank_map: Dict[int, Dict[int, Optional[str]]], + num_ranks: int) -> None: + assert False + + def make_daemon_spec( + self, + host: str, + daemon_id: str, + network: str, + spec: ServiceSpecs, + daemon_type: Optional[str] = None, + ports: Optional[List[int]] = None, + ip: Optional[str] = None, + rank: Optional[int] = None, + rank_generation: Optional[int] = None, + ) -> CephadmDaemonDeploySpec: + try: + eca = spec.extra_container_args + except AttributeError: + eca = None + return CephadmDaemonDeploySpec( + host=host, + daemon_id=daemon_id, + service_name=spec.service_name(), + network=network, + daemon_type=daemon_type, + ports=ports, + ip=ip, + rank=rank, + rank_generation=rank_generation, + extra_container_args=eca, + ) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + raise NotImplementedError() + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + raise NotImplementedError() + + def config(self, spec: ServiceSpec) -> None: + """ + Configure the cluster for this service. Only called *once* per + service apply. Not for every daemon. + """ + pass + + def daemon_check_post(self, daemon_descrs: List[DaemonDescription]) -> None: + """The post actions needed to be done after daemons are checked""" + if self.mgr.config_dashboard: + if 'dashboard' in self.mgr.get('mgr_map')['modules']: + self.config_dashboard(daemon_descrs) + else: + logger.debug('Dashboard is not enabled. Skip configuration.') + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + """Config dashboard settings.""" + raise NotImplementedError() + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # if this is called for a service type where it hasn't explcitly been + # defined, return empty Daemon Desc + return DaemonDescription() + + def get_keyring_with_caps(self, entity: AuthEntity, caps: List[str]) -> str: + ret, keyring, err = self.mgr.mon_command({ + 'prefix': 'auth get-or-create', + 'entity': entity, + 'caps': caps, + }) + if err: + ret, out, err = self.mgr.mon_command({ + 'prefix': 'auth caps', + 'entity': entity, + 'caps': caps, + }) + if err: + self.mgr.log.warning(f"Unable to update caps for {entity}") + return keyring + + def _inventory_get_fqdn(self, hostname: str) -> str: + """Get a host's FQDN with its hostname. + + If the FQDN can't be resolved, the address from the inventory will + be returned instead. + """ + addr = self.mgr.inventory.get_addr(hostname) + return socket.getfqdn(addr) + + def _set_service_url_on_dashboard(self, + service_name: str, + get_mon_cmd: str, + set_mon_cmd: str, + service_url: str) -> None: + """A helper to get and set service_url via Dashboard's MON command. + + If result of get_mon_cmd differs from service_url, set_mon_cmd will + be sent to set the service_url. + """ + def get_set_cmd_dicts(out: str) -> List[dict]: + cmd_dict = { + 'prefix': set_mon_cmd, + 'value': service_url + } + return [cmd_dict] if service_url != out else [] + + self._check_and_set_dashboard( + service_name=service_name, + get_cmd=get_mon_cmd, + get_set_cmd_dicts=get_set_cmd_dicts + ) + + def _check_and_set_dashboard(self, + service_name: str, + get_cmd: str, + get_set_cmd_dicts: Callable[[str], List[dict]]) -> None: + """A helper to set configs in the Dashboard. + + The method is useful for the pattern: + - Getting a config from Dashboard by using a Dashboard command. e.g. current iSCSI + gateways. + - Parse or deserialize previous output. e.g. Dashboard command returns a JSON string. + - Determine if the config need to be update. NOTE: This step is important because if a + Dashboard command modified Ceph config, cephadm's config_notify() is called. Which + kicks the serve() loop and the logic using this method is likely to be called again. + A config should be updated only when needed. + - Update a config in Dashboard by using a Dashboard command. + + :param service_name: the service name to be used for logging + :type service_name: str + :param get_cmd: Dashboard command prefix to get config. e.g. dashboard get-grafana-api-url + :type get_cmd: str + :param get_set_cmd_dicts: function to create a list, and each item is a command dictionary. + e.g. + [ + { + 'prefix': 'dashboard iscsi-gateway-add', + 'service_url': 'http://admin:admin@aaa:5000', + 'name': 'aaa' + }, + { + 'prefix': 'dashboard iscsi-gateway-add', + 'service_url': 'http://admin:admin@bbb:5000', + 'name': 'bbb' + } + ] + The function should return empty list if no command need to be sent. + :type get_set_cmd_dicts: Callable[[str], List[dict]] + """ + + try: + _, out, _ = self.mgr.check_mon_command({ + 'prefix': get_cmd + }) + except MonCommandFailed as e: + logger.warning('Failed to get Dashboard config for %s: %s', service_name, e) + return + cmd_dicts = get_set_cmd_dicts(out.strip()) + for cmd_dict in list(cmd_dicts): + try: + inbuf = cmd_dict.pop('inbuf', None) + _, out, _ = self.mgr.check_mon_command(cmd_dict, inbuf) + except MonCommandFailed as e: + logger.warning('Failed to set Dashboard config for %s: %s', service_name, e) + + def ok_to_stop_osd( + self, + osds: List[str], + known: Optional[List[str]] = None, # output argument + force: bool = False) -> HandleCommandResult: + r = HandleCommandResult(*self.mgr.mon_command({ + 'prefix': "osd ok-to-stop", + 'ids': osds, + 'max': 16, + })) + j = None + try: + j = json.loads(r.stdout) + except json.decoder.JSONDecodeError: + self.mgr.log.warning("osd ok-to-stop didn't return structured result") + raise + if r.retval: + return r + if known is not None and j and j.get('ok_to_stop'): + self.mgr.log.debug(f"got {j}") + known.extend([f'osd.{x}' for x in j.get('osds', [])]) + return HandleCommandResult( + 0, + f'{",".join(["osd.%s" % o for o in osds])} {"is" if len(osds) == 1 else "are"} safe to restart', + '' + ) + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + out = f'It appears safe to stop {",".join(names)}' + err = f'It is NOT safe to stop {",".join(names)} at this time' + + if self.TYPE not in ['mon', 'osd', 'mds']: + logger.debug(out) + return HandleCommandResult(0, out) + + if self.TYPE == 'osd': + return self.ok_to_stop_osd(daemon_ids, known, force) + + r = HandleCommandResult(*self.mgr.mon_command({ + 'prefix': f'{self.TYPE} ok-to-stop', + 'ids': daemon_ids, + })) + + if r.retval: + err = f'{err}: {r.stderr}' if r.stderr else err + logger.debug(err) + return HandleCommandResult(r.retval, r.stdout, err) + + out = f'{out}: {r.stdout}' if r.stdout else out + logger.debug(out) + return HandleCommandResult(r.retval, out, r.stderr) + + def _enough_daemons_to_stop(self, daemon_type: str, daemon_ids: List[str], service: str, low_limit: int, alert: bool = False) -> Tuple[bool, str]: + # Provides a warning about if it possible or not to stop <n> daemons in a service + names = [f'{daemon_type}.{d_id}' for d_id in daemon_ids] + number_of_running_daemons = len( + [daemon + for daemon in self.mgr.cache.get_daemons_by_type(daemon_type) + if daemon.status == DaemonDescriptionStatus.running]) + if (number_of_running_daemons - len(daemon_ids)) >= low_limit: + return False, f'It is presumed safe to stop {names}' + + num_daemons_left = number_of_running_daemons - len(daemon_ids) + + def plural(count: int) -> str: + return 'daemon' if count == 1 else 'daemons' + + left_count = "no" if num_daemons_left == 0 else num_daemons_left + + if alert: + out = (f'ALERT: Cannot stop {names} in {service} service. ' + f'Not enough remaining {service} daemons. ' + f'Please deploy at least {low_limit + 1} {service} daemons before stopping {names}. ') + else: + out = (f'WARNING: Stopping {len(daemon_ids)} out of {number_of_running_daemons} daemons in {service} service. ' + f'Service will not be operational with {left_count} {plural(num_daemons_left)} left. ' + f'At least {low_limit} {plural(low_limit)} must be running to guarantee service. ') + return True, out + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before the daemon is removed. + """ + assert daemon.daemon_type is not None + assert self.TYPE == daemon_type_to_service(daemon.daemon_type) + logger.debug(f'Pre remove daemon {self.TYPE}.{daemon.daemon_id}') + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + """ + Called after the daemon is removed. + """ + assert daemon.daemon_type is not None + assert self.TYPE == daemon_type_to_service(daemon.daemon_type) + logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}') + + def purge(self, service_name: str) -> None: + """Called to carry out any purge tasks following service removal""" + logger.debug(f'Purge called for {self.TYPE} - no action taken') + + +class CephService(CephadmService): + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + # Ceph.daemons (mon, mgr, mds, osd, etc) + cephadm_config = self.get_config_and_keyring( + daemon_spec.daemon_type, + daemon_spec.daemon_id, + host=daemon_spec.host, + keyring=daemon_spec.keyring, + extra_ceph_config=daemon_spec.ceph_conf) + + if daemon_spec.config_get_files(): + cephadm_config.update({'files': daemon_spec.config_get_files()}) + + return cephadm_config, [] + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + self.remove_keyring(daemon) + + def get_auth_entity(self, daemon_id: str, host: str = "") -> AuthEntity: + """ + Map the daemon id to a cephx keyring entity name + """ + # despite this mapping entity names to daemons, self.TYPE within + # the CephService class refers to service types, not daemon types + if self.TYPE in ['rgw', 'rbd-mirror', 'cephfs-mirror', 'nfs', "iscsi", 'ingress']: + return AuthEntity(f'client.{self.TYPE}.{daemon_id}') + elif self.TYPE == 'crash': + if host == "": + raise OrchestratorError("Host not provided to generate <crash> auth entity name") + return AuthEntity(f'client.{self.TYPE}.{host}') + elif self.TYPE == 'mon': + return AuthEntity('mon.') + elif self.TYPE in ['mgr', 'osd', 'mds']: + return AuthEntity(f'{self.TYPE}.{daemon_id}') + else: + raise OrchestratorError("unknown daemon type") + + def get_config_and_keyring(self, + daemon_type: str, + daemon_id: str, + host: str, + keyring: Optional[str] = None, + extra_ceph_config: Optional[str] = None + ) -> Dict[str, Any]: + # keyring + if not keyring: + entity: AuthEntity = self.get_auth_entity(daemon_id, host=host) + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get', + 'entity': entity, + }) + + config = self.mgr.get_minimal_ceph_conf() + + if extra_ceph_config: + config += extra_ceph_config + + return { + 'config': config, + 'keyring': keyring, + } + + def remove_keyring(self, daemon: DaemonDescription) -> None: + assert daemon.daemon_id is not None + assert daemon.hostname is not None + daemon_id: str = daemon.daemon_id + host: str = daemon.hostname + + assert daemon.daemon_type != 'mon' + + entity = self.get_auth_entity(daemon_id, host=host) + + logger.info(f'Removing key for {entity}') + ret, out, err = self.mgr.mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + +class MonService(CephService): + TYPE = 'mon' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + """ + Create a new monitor on the given host. + """ + assert self.TYPE == daemon_spec.daemon_type + name, _, network = daemon_spec.daemon_id, daemon_spec.host, daemon_spec.network + + # get mon. key + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get', + 'entity': self.get_auth_entity(name), + }) + + extra_config = '[mon.%s]\n' % name + if network: + # infer whether this is a CIDR network, addrvec, or plain IP + if '/' in network: + extra_config += 'public network = %s\n' % network + elif network.startswith('[v') and network.endswith(']'): + extra_config += 'public addrv = %s\n' % network + elif is_ipv6(network): + extra_config += 'public addr = %s\n' % unwrap_ipv6(network) + elif ':' not in network: + extra_config += 'public addr = %s\n' % network + else: + raise OrchestratorError( + 'Must specify a CIDR network, ceph addrvec, or plain IP: \'%s\'' % network) + else: + # try to get the public_network from the config + ret, network, err = self.mgr.check_mon_command({ + 'prefix': 'config get', + 'who': 'mon', + 'key': 'public_network', + }) + network = network.strip() if network else network + if not network: + raise OrchestratorError( + 'Must set public_network config option or specify a CIDR network, ceph addrvec, or plain IP') + if '/' not in network: + raise OrchestratorError( + 'public_network is set but does not look like a CIDR network: \'%s\'' % network) + extra_config += 'public network = %s\n' % network + + daemon_spec.ceph_conf = extra_config + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def _check_safe_to_destroy(self, mon_id: str) -> None: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'quorum_status', + }) + try: + j = json.loads(out) + except Exception: + raise OrchestratorError('failed to parse quorum status') + + mons = [m['name'] for m in j['monmap']['mons']] + if mon_id not in mons: + logger.info('Safe to remove mon.%s: not in monmap (%s)' % ( + mon_id, mons)) + return + new_mons = [m for m in mons if m != mon_id] + new_quorum = [m for m in j['quorum_names'] if m != mon_id] + if len(new_quorum) > len(new_mons) / 2: + logger.info('Safe to remove mon.%s: new quorum should be %s (from %s)' % + (mon_id, new_quorum, new_mons)) + return + raise OrchestratorError( + 'Removing %s would break mon quorum (new quorum %s, new mons %s)' % (mon_id, new_quorum, new_mons)) + + def pre_remove(self, daemon: DaemonDescription) -> None: + super().pre_remove(daemon) + + assert daemon.daemon_id is not None + daemon_id: str = daemon.daemon_id + self._check_safe_to_destroy(daemon_id) + + # remove mon from quorum before we destroy the daemon + logger.info('Removing monitor %s from monmap...' % daemon_id) + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'mon rm', + 'name': daemon_id, + }) + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + # Do not remove the mon keyring. + # super().post_remove(daemon) + pass + + +class MgrService(CephService): + TYPE = 'mgr' + + def allow_colo(self) -> bool: + if self.mgr.get_ceph_option('mgr_standby_modules'): + # traditional mgr mode: standby daemons' modules listen on + # ports and redirect to the primary. we must not schedule + # multiple mgrs on the same host or else ports will + # conflict. + return False + else: + # standby daemons do nothing, and therefore port conflicts + # are not a concern. + return True + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + """ + Create a new manager instance on a host. + """ + assert self.TYPE == daemon_spec.daemon_type + mgr_id, _ = daemon_spec.daemon_id, daemon_spec.host + + # get mgr. key + keyring = self.get_keyring_with_caps(self.get_auth_entity(mgr_id), + ['mon', 'profile mgr', + 'osd', 'allow *', + 'mds', 'allow *']) + + # Retrieve ports used by manager modules + # In the case of the dashboard port and with several manager daemons + # running in different hosts, it exists the possibility that the + # user has decided to use different dashboard ports in each server + # If this is the case then the dashboard port opened will be only the used + # as default. + ports = [] + ret, mgr_services, err = self.mgr.check_mon_command({ + 'prefix': 'mgr services', + }) + if mgr_services: + mgr_endpoints = json.loads(mgr_services) + for end_point in mgr_endpoints.values(): + port = re.search(r'\:\d+\/', end_point) + if port: + ports.append(int(port[0][1:-1])) + + if ports: + daemon_spec.ports = ports + + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + for daemon in daemon_descrs: + assert daemon.daemon_type is not None + assert daemon.daemon_id is not None + if self.mgr.daemon_is_self(daemon.daemon_type, daemon.daemon_id): + return daemon + # if no active mgr found, return empty Daemon Desc + return DaemonDescription() + + def fail_over(self) -> None: + # this has been seen to sometimes transiently fail even when there are multiple + # mgr daemons. As long as there are multiple known mgr daemons, we should retry. + class NoStandbyError(OrchestratorError): + pass + no_standby_exc = NoStandbyError('Need standby mgr daemon', event_kind_subject=( + 'daemon', 'mgr' + self.mgr.get_mgr_id())) + for sleep_secs in [2, 8, 15]: + try: + if not self.mgr_map_has_standby(): + raise no_standby_exc + self.mgr.events.for_daemon('mgr' + self.mgr.get_mgr_id(), + 'INFO', 'Failing over to other MGR') + logger.info('Failing over to other MGR') + + # fail over + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'mgr fail', + 'who': self.mgr.get_mgr_id(), + }) + return + except NoStandbyError: + logger.info( + f'Failed to find standby mgr for failover. Retrying in {sleep_secs} seconds') + time.sleep(sleep_secs) + raise no_standby_exc + + def mgr_map_has_standby(self) -> bool: + """ + This is a bit safer than asking our inventory. If the mgr joined the mgr map, + we know it joined the cluster + """ + mgr_map = self.mgr.get('mgr_map') + num = len(mgr_map.get('standbys')) + return bool(num) + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + # ok to stop if there is more than 1 mgr and not trying to stop the active mgr + + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Mgr', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + mgr_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE) + active = self.get_active_daemon(mgr_daemons).daemon_id + if active in daemon_ids: + warn_message = 'ALERT: Cannot stop active Mgr daemon, Please switch active Mgrs with \'ceph mgr fail %s\'' % active + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + return HandleCommandResult(0, warn_message, '') + + +class MdsService(CephService): + TYPE = 'mds' + + def allow_colo(self) -> bool: + return True + + def config(self, spec: ServiceSpec) -> None: + assert self.TYPE == spec.service_type + assert spec.service_id + + # ensure mds_join_fs is set for these daemons + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': 'mds.' + spec.service_id, + 'name': 'mds_join_fs', + 'value': spec.service_id, + }) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + mds_id, _ = daemon_spec.daemon_id, daemon_spec.host + + # get mds. key + keyring = self.get_keyring_with_caps(self.get_auth_entity(mds_id), + ['mon', 'profile mds', + 'osd', 'allow rw tag cephfs *=*', + 'mds', 'allow']) + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + active_mds_strs = list() + for fs in self.mgr.get('fs_map')['filesystems']: + mds_map = fs['mdsmap'] + if mds_map is not None: + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] == 'up:active': + active_mds_strs.append(mds_status['name']) + if len(active_mds_strs) != 0: + for daemon in daemon_descrs: + if daemon.daemon_id in active_mds_strs: + return daemon + # if no mds found, return empty Daemon Desc + return DaemonDescription() + + def purge(self, service_name: str) -> None: + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': service_name, + 'name': 'mds_join_fs', + }) + + +class RgwService(CephService): + TYPE = 'rgw' + + def allow_colo(self) -> bool: + return True + + def config(self, spec: RGWSpec) -> None: # type: ignore + assert self.TYPE == spec.service_type + + # set rgw_realm and rgw_zone, if present + if spec.rgw_realm: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}", + 'name': 'rgw_realm', + 'value': spec.rgw_realm, + }) + if spec.rgw_zone: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}", + 'name': 'rgw_zone', + 'value': spec.rgw_zone, + }) + + if spec.rgw_frontend_ssl_certificate: + if isinstance(spec.rgw_frontend_ssl_certificate, list): + cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate) + elif isinstance(spec.rgw_frontend_ssl_certificate, str): + cert_data = spec.rgw_frontend_ssl_certificate + else: + raise OrchestratorError( + 'Invalid rgw_frontend_ssl_certificate: %s' + % spec.rgw_frontend_ssl_certificate) + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'rgw/cert/{spec.service_name()}', + 'val': cert_data, + }) + + # TODO: fail, if we don't have a spec + logger.info('Saving service %s spec with placement %s' % ( + spec.service_name(), spec.placement.pretty_str())) + self.mgr.spec_store.save(spec) + self.mgr.trigger_connect_dashboard_rgw() + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + rgw_id, _ = daemon_spec.daemon_id, daemon_spec.host + spec = cast(RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + keyring = self.get_keyring(rgw_id) + + if daemon_spec.ports: + port = daemon_spec.ports[0] + else: + # this is a redeploy of older instance that doesn't have an explicitly + # assigned port, in which case we can assume there is only 1 per host + # and it matches the spec. + port = spec.get_port() + + # configure frontend + args = [] + ftype = spec.rgw_frontend_type or "beast" + if ftype == 'beast': + if spec.ssl: + if daemon_spec.ip: + args.append( + f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") + else: + args.append(f"ssl_port={port}") + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + else: + if daemon_spec.ip: + args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") + else: + args.append(f"port={port}") + elif ftype == 'civetweb': + if spec.ssl: + if daemon_spec.ip: + # note the 's' suffix on port + args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s") + else: + args.append(f"port={port}s") # note the 's' suffix on port + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + else: + if daemon_spec.ip: + args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") + else: + args.append(f"port={port}") + frontend = f'{ftype} {" ".join(args)}' + + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': utils.name_to_config_section(daemon_spec.name()), + 'name': 'rgw_frontends', + 'value': frontend + }) + + daemon_spec.keyring = keyring + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def get_keyring(self, rgw_id: str) -> str: + keyring = self.get_keyring_with_caps(self.get_auth_entity(rgw_id), + ['mon', 'allow *', + 'mgr', 'allow rw', + 'osd', 'allow rwx tag rgw *=*']) + return keyring + + def purge(self, service_name: str) -> None: + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': utils.name_to_config_section(service_name), + 'name': 'rgw_realm', + }) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': utils.name_to_config_section(service_name), + 'name': 'rgw_zone', + }) + self.mgr.check_mon_command({ + 'prefix': 'config-key rm', + 'key': f'rgw/cert/{service_name}', + }) + self.mgr.trigger_connect_dashboard_rgw() + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'who': utils.name_to_config_section(daemon.name()), + 'name': 'rgw_frontends', + }) + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + # if load balancer (ingress) is present block if only 1 daemon up otherwise ok + # if no load balancer, warn if > 1 daemon, block if only 1 daemon + def ingress_present() -> bool: + running_ingress_daemons = [ + daemon for daemon in self.mgr.cache.get_daemons_by_type('ingress') if daemon.status == 1] + running_haproxy_daemons = [ + daemon for daemon in running_ingress_daemons if daemon.daemon_type == 'haproxy'] + running_keepalived_daemons = [ + daemon for daemon in running_ingress_daemons if daemon.daemon_type == 'keepalived'] + # check that there is at least one haproxy and keepalived daemon running + if running_haproxy_daemons and running_keepalived_daemons: + return True + return False + + # if only 1 rgw, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'RGW', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 rgw daemon. + # Say okay if load balancer present or force flag set + if ingress_present() or force: + return HandleCommandResult(0, warn_message, '') + + # if reached here, > 1 RGW daemon, no load balancer and no force flag. + # Provide warning + warn_message = "WARNING: Removing RGW daemons can cause clients to lose connectivity. " + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + self.mgr.trigger_connect_dashboard_rgw() + + +class RbdMirrorService(CephService): + TYPE = 'rbd-mirror' + + def allow_colo(self) -> bool: + return True + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_id, _ = daemon_spec.daemon_id, daemon_spec.host + + keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id), + ['mon', 'profile rbd-mirror', + 'osd', 'profile rbd']) + + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def ok_to_stop( + self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None # output argument + ) -> HandleCommandResult: + # if only 1 rbd-mirror, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop( + self.TYPE, daemon_ids, 'Rbdmirror', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class CrashService(CephService): + TYPE = 'crash' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_id, host = daemon_spec.daemon_id, daemon_spec.host + + keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id, host=host), + ['mon', 'profile crash', + 'mgr', 'profile crash']) + + daemon_spec.keyring = keyring + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + +class CephfsMirrorService(CephService): + TYPE = 'cephfs-mirror' + + def config(self, spec: ServiceSpec) -> None: + # make sure mirroring module is enabled + mgr_map = self.mgr.get('mgr_map') + mod_name = 'mirroring' + if mod_name not in mgr_map.get('services', {}): + self.mgr.check_mon_command({ + 'prefix': 'mgr module enable', + 'module': mod_name + }) + # we shouldn't get here (mon will tell the mgr to respawn), but no + # harm done if we do. + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get-or-create', + 'entity': self.get_auth_entity(daemon_spec.daemon_id), + 'caps': ['mon', 'profile cephfs-mirror', + 'mds', 'allow r', + 'osd', 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*', + 'mgr', 'allow r'], + }) + + daemon_spec.keyring = keyring + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec diff --git a/src/pybind/mgr/cephadm/services/container.py b/src/pybind/mgr/cephadm/services/container.py new file mode 100644 index 000000000..b9cdfad5e --- /dev/null +++ b/src/pybind/mgr/cephadm/services/container.py @@ -0,0 +1,29 @@ +import logging +from typing import List, Any, Tuple, Dict, cast + +from ceph.deployment.service_spec import CustomContainerSpec + +from .cephadmservice import CephadmService, CephadmDaemonDeploySpec + +logger = logging.getLogger(__name__) + + +class CustomContainerService(CephadmService): + TYPE = 'container' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) \ + -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) \ + -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + spec = cast(CustomContainerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + config: Dict[str, Any] = spec.config_json() + logger.debug( + 'Generated configuration for \'%s\' service: config-json=%s, dependencies=%s' % + (self.TYPE, config, deps)) + return config, deps diff --git a/src/pybind/mgr/cephadm/services/exporter.py b/src/pybind/mgr/cephadm/services/exporter.py new file mode 100644 index 000000000..b9c7d85e6 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/exporter.py @@ -0,0 +1,147 @@ +import json +import logging +from typing import TYPE_CHECKING, List, Dict, Any, Tuple + +from orchestrator import OrchestratorError +from mgr_util import ServerConfigException, verify_tls + +from .cephadmservice import CephadmService, CephadmDaemonDeploySpec + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + + +class CephadmExporterConfig: + required_keys = ['crt', 'key', 'token', 'port'] + DEFAULT_PORT = '9443' + + def __init__(self, mgr: "CephadmOrchestrator", crt: str = "", key: str = "", + token: str = "", port: str = "") -> None: + self.mgr = mgr + self.crt = crt + self.key = key + self.token = token + self.port = port + + @property + def ready(self) -> bool: + return all([self.crt, self.key, self.token, self.port]) + + def load_from_store(self) -> None: + cfg = self.mgr._get_exporter_config() + + assert isinstance(cfg, dict) + self.crt = cfg.get('crt', "") + self.key = cfg.get('key', "") + self.token = cfg.get('token', "") + self.port = cfg.get('port', "") + + def load_from_json(self, json_str: str) -> Tuple[int, str]: + try: + cfg = json.loads(json_str) + except ValueError: + return 1, "Invalid JSON provided - unable to load" + + if not all([k in cfg for k in CephadmExporterConfig.required_keys]): + return 1, "JSON file must contain crt, key, token and port" + + self.crt = cfg.get('crt') + self.key = cfg.get('key') + self.token = cfg.get('token') + self.port = cfg.get('port') + + return 0, "" + + def validate_config(self) -> Tuple[int, str]: + if not self.ready: + return 1, "Incomplete configuration. cephadm-exporter needs crt, key, token and port to be set" + + for check in [self._validate_tls, self._validate_token, self._validate_port]: + rc, reason = check() + if rc: + return 1, reason + + return 0, "" + + def _validate_tls(self) -> Tuple[int, str]: + + try: + verify_tls(self.crt, self.key) + except ServerConfigException as e: + return 1, str(e) + + return 0, "" + + def _validate_token(self) -> Tuple[int, str]: + if not isinstance(self.token, str): + return 1, "token must be a string" + if len(self.token) < 8: + return 1, "Token must be a string of at least 8 chars in length" + + return 0, "" + + def _validate_port(self) -> Tuple[int, str]: + try: + p = int(str(self.port)) + if p <= 1024: + raise ValueError + except ValueError: + return 1, "Port must be a integer (>1024)" + + return 0, "" + + +class CephadmExporter(CephadmService): + TYPE = 'cephadm-exporter' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + + cfg = CephadmExporterConfig(self.mgr) + cfg.load_from_store() + + if cfg.ready: + rc, reason = cfg.validate_config() + if rc: + raise OrchestratorError(reason) + else: + logger.info( + "Incomplete/Missing configuration, applying defaults") + self.mgr._set_exporter_defaults() + cfg.load_from_store() + + if not daemon_spec.ports: + daemon_spec.ports = [int(cfg.port)] + + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + + cfg = CephadmExporterConfig(self.mgr) + cfg.load_from_store() + + if cfg.ready: + rc, reason = cfg.validate_config() + if rc: + raise OrchestratorError(reason) + else: + logger.info("Using default configuration for cephadm-exporter") + self.mgr._set_exporter_defaults() + cfg.load_from_store() + + config = { + "crt": cfg.crt, + "key": cfg.key, + "token": cfg.token + } + return config, deps + + def purge(self, service_name: str) -> None: + logger.info("Purging cephadm-exporter settings from mon K/V store") + self.mgr._clear_exporter_config_settings() diff --git a/src/pybind/mgr/cephadm/services/ingress.py b/src/pybind/mgr/cephadm/services/ingress.py new file mode 100644 index 000000000..99fde1c43 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/ingress.py @@ -0,0 +1,296 @@ +import ipaddress +import logging +import random +import string +from typing import List, Dict, Any, Tuple, cast, Optional + +from ceph.deployment.service_spec import IngressSpec +from mgr_util import build_url +from cephadm.utils import resolve_ip +from orchestrator import OrchestratorError +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService + +logger = logging.getLogger(__name__) + + +class IngressService(CephService): + TYPE = 'ingress' + + def primary_daemon_type(self) -> str: + return 'haproxy' + + def per_host_daemon_type(self) -> Optional[str]: + return 'keepalived' + + def prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + if daemon_spec.daemon_type == 'haproxy': + return self.haproxy_prepare_create(daemon_spec) + if daemon_spec.daemon_type == 'keepalived': + return self.keepalived_prepare_create(daemon_spec) + assert False, "unexpected daemon type" + + def generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec + ) -> Tuple[Dict[str, Any], List[str]]: + if daemon_spec.daemon_type == 'haproxy': + return self.haproxy_generate_config(daemon_spec) + else: + return self.keepalived_generate_config(daemon_spec) + assert False, "unexpected daemon type" + + def haproxy_prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + assert daemon_spec.daemon_type == 'haproxy' + + daemon_id = daemon_spec.daemon_id + host = daemon_spec.host + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + logger.debug('prepare_create haproxy.%s on host %s with spec %s' % ( + daemon_id, host, spec)) + + daemon_spec.final_config, daemon_spec.deps = self.haproxy_generate_config(daemon_spec) + + return daemon_spec + + def haproxy_generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> Tuple[Dict[str, Any], List[str]]: + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + assert spec.backend_service + if spec.backend_service not in self.mgr.spec_store: + raise RuntimeError( + f'{spec.service_name()} backend service {spec.backend_service} does not exist') + backend_spec = self.mgr.spec_store[spec.backend_service].spec + daemons = self.mgr.cache.get_daemons_by_service(spec.backend_service) + deps = [d.name() for d in daemons] + + # generate password? + pw_key = f'{spec.service_name()}/monitor_password' + password = self.mgr.get_store(pw_key) + if password is None: + if not spec.monitor_password: + password = ''.join(random.choice(string.ascii_lowercase) for _ in range(20)) + self.mgr.set_store(pw_key, password) + else: + if spec.monitor_password: + self.mgr.set_store(pw_key, None) + if spec.monitor_password: + password = spec.monitor_password + + if backend_spec.service_type == 'nfs': + mode = 'tcp' + by_rank = {d.rank: d for d in daemons if d.rank is not None} + servers = [] + + # try to establish how many ranks we *should* have + num_ranks = backend_spec.placement.count + if not num_ranks: + num_ranks = 1 + max(by_rank.keys()) + + for rank in range(num_ranks): + if rank in by_rank: + d = by_rank[rank] + assert d.ports + servers.append({ + 'name': f"{spec.backend_service}.{rank}", + 'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))), + 'port': d.ports[0], + }) + else: + # offline/missing server; leave rank in place + servers.append({ + 'name': f"{spec.backend_service}.{rank}", + 'ip': '0.0.0.0', + 'port': 0, + }) + else: + mode = 'http' + servers = [ + { + 'name': d.name(), + 'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))), + 'port': d.ports[0], + } for d in daemons if d.ports + ] + + haproxy_conf = self.mgr.template.render( + 'services/ingress/haproxy.cfg.j2', + { + 'spec': spec, + 'mode': mode, + 'servers': servers, + 'user': spec.monitor_user or 'admin', + 'password': password, + 'ip': "*" if spec.virtual_ips_list else str(spec.virtual_ip).split('/')[0] or daemon_spec.ip or '*', + 'frontend_port': daemon_spec.ports[0] if daemon_spec.ports else spec.frontend_port, + 'monitor_port': daemon_spec.ports[1] if daemon_spec.ports else spec.monitor_port, + } + ) + config_files = { + 'files': { + "haproxy.cfg": haproxy_conf, + } + } + if spec.ssl_cert: + ssl_cert = spec.ssl_cert + if isinstance(ssl_cert, list): + ssl_cert = '\n'.join(ssl_cert) + config_files['files']['haproxy.pem'] = ssl_cert + + return config_files, sorted(deps) + + def keepalived_prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + assert daemon_spec.daemon_type == 'keepalived' + + daemon_id = daemon_spec.daemon_id + host = daemon_spec.host + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + logger.debug('prepare_create keepalived.%s on host %s with spec %s' % ( + daemon_id, host, spec)) + + daemon_spec.final_config, daemon_spec.deps = self.keepalived_generate_config(daemon_spec) + + return daemon_spec + + def keepalived_generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> Tuple[Dict[str, Any], List[str]]: + spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + assert spec.backend_service + + # generate password? + pw_key = f'{spec.service_name()}/keepalived_password' + password = self.mgr.get_store(pw_key) + if password is None: + if not spec.keepalived_password: + password = ''.join(random.choice(string.ascii_lowercase) for _ in range(20)) + self.mgr.set_store(pw_key, password) + else: + if spec.keepalived_password: + self.mgr.set_store(pw_key, None) + if spec.keepalived_password: + password = spec.keepalived_password + + daemons = self.mgr.cache.get_daemons_by_service(spec.service_name()) + + if not daemons: + raise OrchestratorError( + f'Failed to generate keepalived.conf: No daemons deployed for {spec.service_name()}') + + deps = sorted([d.name() for d in daemons if d.daemon_type == 'haproxy']) + + host = daemon_spec.host + hosts = sorted(list(set([host] + [str(d.hostname) for d in daemons]))) + + # interface + bare_ips = [] + if spec.virtual_ip: + bare_ips.append(str(spec.virtual_ip).split('/')[0]) + elif spec.virtual_ips_list: + bare_ips = [str(vip).split('/')[0] for vip in spec.virtual_ips_list] + interface = None + for bare_ip in bare_ips: + for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): + if ifaces and ipaddress.ip_address(bare_ip) in ipaddress.ip_network(subnet): + interface = list(ifaces.keys())[0] + logger.info( + f'{bare_ip} is in {subnet} on {host} interface {interface}' + ) + break + else: # nobreak + continue + break + # try to find interface by matching spec.virtual_interface_networks + if not interface and spec.virtual_interface_networks: + for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): + if subnet in spec.virtual_interface_networks: + interface = list(ifaces.keys())[0] + logger.info( + f'{spec.virtual_ip} will be configured on {host} interface ' + f'{interface} (which has guiding subnet {subnet})' + ) + break + if not interface: + raise OrchestratorError( + f"Unable to identify interface for {spec.virtual_ip} on {host}" + ) + + # script to monitor health + script = '/usr/bin/false' + for d in daemons: + if d.hostname == host: + if d.daemon_type == 'haproxy': + assert d.ports + port = d.ports[1] # monitoring port + script = f'/usr/bin/curl {build_url(scheme="http", host=d.ip or "localhost", port=port)}/health' + assert script + + states = [] + priorities = [] + virtual_ips = [] + + # Set state and priority. Have one master for each VIP. Or at least the first one as master if only one VIP. + if spec.virtual_ip: + virtual_ips.append(spec.virtual_ip) + if hosts[0] == host: + states.append('MASTER') + priorities.append(100) + else: + states.append('BACKUP') + priorities.append(90) + + elif spec.virtual_ips_list: + virtual_ips = spec.virtual_ips_list + if len(virtual_ips) > len(hosts): + raise OrchestratorError( + "Number of virtual IPs for ingress is greater than number of available hosts" + ) + for x in range(len(virtual_ips)): + if hosts[x] == host: + states.append('MASTER') + priorities.append(100) + else: + states.append('BACKUP') + priorities.append(90) + + # remove host, daemon is being deployed on from hosts list for + # other_ips in conf file and converter to ips + if host in hosts: + hosts.remove(host) + other_ips = [resolve_ip(self.mgr.inventory.get_addr(h)) for h in hosts] + + keepalived_conf = self.mgr.template.render( + 'services/ingress/keepalived.conf.j2', + { + 'spec': spec, + 'script': script, + 'password': password, + 'interface': interface, + 'virtual_ips': virtual_ips, + 'states': states, + 'priorities': priorities, + 'other_ips': other_ips, + 'host_ip': resolve_ip(self.mgr.inventory.get_addr(host)), + } + ) + + config_file = { + 'files': { + "keepalived.conf": keepalived_conf, + } + } + + return config_file, deps diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py new file mode 100644 index 000000000..c42eff683 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/iscsi.py @@ -0,0 +1,210 @@ +import errno +import json +import logging +import subprocess +from typing import List, cast, Optional +from ipaddress import ip_address, IPv6Address + +from mgr_module import HandleCommandResult +from ceph.deployment.service_spec import IscsiServiceSpec + +from orchestrator import DaemonDescription, DaemonDescriptionStatus +from .cephadmservice import CephadmDaemonDeploySpec, CephService +from .. import utils + +logger = logging.getLogger(__name__) + + +class IscsiService(CephService): + TYPE = 'iscsi' + + def config(self, spec: IscsiServiceSpec) -> None: # type: ignore + assert self.TYPE == spec.service_type + assert spec.pool + self.mgr._check_pool_exists(spec.pool, spec.service_name()) + + def get_trusted_ips(self, spec: IscsiServiceSpec) -> str: + # add active mgr ip address to trusted list so dashboard can access + trusted_ip_list = spec.trusted_ip_list if spec.trusted_ip_list else '' + if trusted_ip_list: + trusted_ip_list += ',' + trusted_ip_list += self.mgr.get_mgr_ip() + return trusted_ip_list + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + + spec = cast(IscsiServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + igw_id = daemon_spec.daemon_id + + keyring = self.get_keyring_with_caps(self.get_auth_entity(igw_id), + ['mon', 'profile rbd, ' + 'allow command "osd blocklist", ' + 'allow command "config-key get" with "key" prefix "iscsi/"', + 'mgr', 'allow command "service status"', + 'osd', 'allow rwx']) + + if spec.ssl_cert: + if isinstance(spec.ssl_cert, list): + cert_data = '\n'.join(spec.ssl_cert) + else: + cert_data = spec.ssl_cert + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.crt', + 'val': cert_data, + }) + + if spec.ssl_key: + if isinstance(spec.ssl_key, list): + key_data = '\n'.join(spec.ssl_key) + else: + key_data = spec.ssl_key + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.key', + 'val': key_data, + }) + + trusted_ip_list = self.get_trusted_ips(spec) + + context = { + 'client_name': '{}.{}'.format(utils.name_to_config_section('iscsi'), igw_id), + 'trusted_ip_list': trusted_ip_list, + 'spec': spec + } + igw_conf = self.mgr.template.render('services/iscsi/iscsi-gateway.cfg.j2', context) + + daemon_spec.keyring = keyring + daemon_spec.extra_files = {'iscsi-gateway.cfg': igw_conf} + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + daemon_spec.deps = [trusted_ip_list] + return daemon_spec + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + def get_set_cmd_dicts(out: str) -> List[dict]: + gateways = json.loads(out)['gateways'] + cmd_dicts = [] + # TODO: fail, if we don't have a spec + spec = cast(IscsiServiceSpec, + self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None)) + if spec.api_secure and spec.ssl_cert and spec.ssl_key: + cmd_dicts.append({ + 'prefix': 'dashboard set-iscsi-api-ssl-verification', + 'value': "false" + }) + else: + cmd_dicts.append({ + 'prefix': 'dashboard set-iscsi-api-ssl-verification', + 'value': "true" + }) + for dd in daemon_descrs: + assert dd.hostname is not None + # todo: this can fail: + spec = cast(IscsiServiceSpec, + self.mgr.spec_store.all_specs.get(dd.service_name(), None)) + if not spec: + logger.warning('No ServiceSpec found for %s', dd) + continue + ip = utils.resolve_ip(self.mgr.inventory.get_addr(dd.hostname)) + # IPv6 URL encoding requires square brackets enclosing the ip + if type(ip_address(ip)) is IPv6Address: + ip = f'[{ip}]' + protocol = "http" + if spec.api_secure and spec.ssl_cert and spec.ssl_key: + protocol = "https" + service_url = '{}://{}:{}@{}:{}'.format( + protocol, spec.api_user or 'admin', spec.api_password or 'admin', ip, spec.api_port or '5000') + gw = gateways.get(dd.hostname) + if not gw or gw['service_url'] != service_url: + safe_service_url = '{}://{}:{}@{}:{}'.format( + protocol, '<api-user>', '<api-password>', ip, spec.api_port or '5000') + logger.info('Adding iSCSI gateway %s to Dashboard', safe_service_url) + cmd_dicts.append({ + 'prefix': 'dashboard iscsi-gateway-add', + 'inbuf': service_url, + 'name': dd.hostname + }) + return cmd_dicts + + self._check_and_set_dashboard( + service_name='iSCSI', + get_cmd='dashboard iscsi-gateway-list', + get_set_cmd_dicts=get_set_cmd_dicts + ) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # if only 1 iscsi, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Iscsi', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nfs daemon. make sure none are down + warn_message = ( + 'ALERT: 1 iscsi daemon is already down. Please bring it back up before stopping this one') + iscsi_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE) + for i in iscsi_daemons: + if i.status != DaemonDescriptionStatus.running: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + warn_message = f'It is presumed safe to stop {names}' + return HandleCommandResult(0, warn_message, '') + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + """ + Called after the daemon is removed. + """ + logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}') + + # remove config for dashboard iscsi gateways + ret, out, err = self.mgr.mon_command({ + 'prefix': 'dashboard iscsi-gateway-rm', + 'name': daemon.hostname, + }) + if not ret: + logger.info(f'{daemon.hostname} removed from iscsi gateways dashboard config') + + # needed to know if we have ssl stuff for iscsi in ceph config + iscsi_config_dict = {} + ret, iscsi_config, err = self.mgr.mon_command({ + 'prefix': 'config-key dump', + 'key': 'iscsi', + }) + if iscsi_config: + iscsi_config_dict = json.loads(iscsi_config) + + # remove iscsi cert and key from ceph config + for iscsi_key, value in iscsi_config_dict.items(): + if f'iscsi/client.{daemon.name()}/' in iscsi_key: + ret, out, err = self.mgr.mon_command({ + 'prefix': 'config-key rm', + 'key': iscsi_key, + }) + logger.info(f'{iscsi_key} removed from ceph config') + + def purge(self, service_name: str) -> None: + """Removes configuration + """ + spec = cast(IscsiServiceSpec, self.mgr.spec_store[service_name].spec) + try: + # remove service configuration from the pool + try: + subprocess.run(['rados', + '-k', str(self.mgr.get_ceph_option('keyring')), + '-n', f'mgr.{self.mgr.get_mgr_id()}', + '-p', cast(str, spec.pool), + 'rm', + 'gateway.conf'], + timeout=5) + logger.info(f'<gateway.conf> removed from {spec.pool}') + except subprocess.CalledProcessError as ex: + logger.error(f'Error executing <<{ex.cmd}>>: {ex.output}') + except subprocess.TimeoutExpired: + logger.error(f'timeout (5s) trying to remove <gateway.conf> from {spec.pool}') + + except Exception: + logger.exception(f'failed to purge {service_name}') diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py new file mode 100644 index 000000000..8de7195a3 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -0,0 +1,502 @@ +import errno +import ipaddress +import logging +import os +import socket +from typing import List, Any, Tuple, Dict, Optional, cast +from urllib.parse import urlparse + +from mgr_module import HandleCommandResult + +from orchestrator import DaemonDescription +from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ + SNMPGatewaySpec, PrometheusSpec +from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec +from cephadm.services.ingress import IngressSpec +from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url + +logger = logging.getLogger(__name__) + + +class GrafanaService(CephadmService): + TYPE = 'grafana' + DEFAULT_SERVICE_PORT = 3000 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps = [] # type: List[str] + + prom_services = [] # type: List[str] + for dd in self.mgr.cache.get_daemons_by_service('prometheus'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else 9095 + prom_services.append(build_url(scheme='http', host=addr, port=port)) + + deps.append(dd.name()) + grafana_data_sources = self.mgr.template.render( + 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services}) + + cert_path = f'{daemon_spec.host}/grafana_crt' + key_path = f'{daemon_spec.host}/grafana_key' + cert = self.mgr.get_store(cert_path) + pkey = self.mgr.get_store(key_path) + if cert and pkey: + try: + verify_tls(cert, pkey) + except ServerConfigException as e: + logger.warning('Provided grafana TLS certificates invalid: %s', str(e)) + cert, pkey = None, None + if not (cert and pkey): + cert, pkey = create_self_signed_cert('Ceph', daemon_spec.host) + self.mgr.set_store(cert_path, cert) + self.mgr.set_store(key_path, pkey) + if 'dashboard' in self.mgr.get('mgr_map')['modules']: + self.mgr.check_mon_command({ + 'prefix': 'dashboard set-grafana-api-ssl-verify', + 'value': 'false', + }) + + spec: GrafanaSpec = cast( + GrafanaSpec, self.mgr.spec_store.active_specs[daemon_spec.service_name]) + grafana_ini = self.mgr.template.render( + 'services/grafana/grafana.ini.j2', { + 'initial_admin_password': spec.initial_admin_password, + 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT, + 'http_addr': daemon_spec.ip if daemon_spec.ip else '' + }) + + if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password: + self.mgr.check_mon_command( + {'prefix': 'dashboard set-grafana-api-password'}, inbuf=spec.initial_admin_password) + + config_file = { + 'files': { + "grafana.ini": grafana_ini, + 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources, + 'certs/cert_file': '# generated by cephadm\n%s' % cert, + 'certs/cert_key': '# generated by cephadm\n%s' % pkey, + } + } + return config_file, sorted(deps) + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # Use the least-created one as the active daemon + if daemon_descrs: + return daemon_descrs[-1] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + # TODO: signed cert + dd = self.get_active_daemon(daemon_descrs) + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + service_url = build_url(scheme='https', host=addr, port=port) + self._set_service_url_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-url', + 'dashboard set-grafana-api-url', + service_url + ) + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before grafana daemon is removed. + """ + if daemon.hostname is not None: + # delete cert/key entires for this grafana daemon + cert_path = f'{daemon.hostname}/grafana_crt' + key_path = f'{daemon.hostname}/grafana_key' + self.mgr.set_store(cert_path, None) + self.mgr.set_store(key_path, None) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1) + if warn and not force: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class AlertmanagerService(CephadmService): + TYPE = 'alertmanager' + DEFAULT_SERVICE_PORT = 9093 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + default_webhook_urls: List[str] = [] + + spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + try: + secure = spec.secure + except AttributeError: + secure = False + user_data = spec.user_data + if 'default_webhook_urls' in user_data and isinstance( + user_data['default_webhook_urls'], list): + default_webhook_urls.extend(user_data['default_webhook_urls']) + + # dashboard(s) + dashboard_urls: List[str] = [] + snmp_gateway_urls: List[str] = [] + mgr_map = self.mgr.get('mgr_map') + port = None + proto = None # http: or https: + url = mgr_map.get('services', {}).get('dashboard', None) + if url: + p_result = urlparse(url.rstrip('/')) + hostname = socket.getfqdn(p_result.hostname) + + try: + ip = ipaddress.ip_address(hostname) + except ValueError: + pass + else: + if ip.version == 6: + hostname = f'[{hostname}]' + + dashboard_urls.append( + f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}') + proto = p_result.scheme + port = p_result.port + # scan all mgrs to generate deps and to get standbys too. + # assume that they are all on the same port as the active mgr. + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider mgr a dep even if the dashboard is disabled + # in order to be consistent with _calc_daemon_deps(). + deps.append(dd.name()) + if not port: + continue + if dd.daemon_id == self.mgr.get_mgr_id(): + continue + assert dd.hostname is not None + addr = self._inventory_get_fqdn(dd.hostname) + dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/')) + + for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'): + assert dd.hostname is not None + assert dd.ports + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + deps.append(dd.name()) + + snmp_gateway_urls.append(build_url(scheme='http', host=addr, + port=dd.ports[0], path='/alerts')) + + context = { + 'dashboard_urls': dashboard_urls, + 'default_webhook_urls': default_webhook_urls, + 'snmp_gateway_urls': snmp_gateway_urls, + 'secure': secure, + } + yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context) + + peers = [] + port = 9094 + for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): + assert dd.hostname is not None + deps.append(dd.name()) + addr = self._inventory_get_fqdn(dd.hostname) + peers.append(build_url(host=addr, port=port).lstrip('/')) + + return { + "files": { + "alertmanager.yml": yml + }, + "peers": peers + }, sorted(deps) + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # TODO: if there are multiple daemons, who is the active one? + if daemon_descrs: + return daemon_descrs[0] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + dd = self.get_active_daemon(daemon_descrs) + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + service_url = build_url(scheme='http', host=addr, port=port) + self._set_service_url_on_dashboard( + 'AlertManager', + 'dashboard get-alertmanager-api-host', + 'dashboard set-alertmanager-api-host', + service_url + ) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1) + if warn and not force: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class PrometheusService(CephadmService): + TYPE = 'prometheus' + DEFAULT_SERVICE_PORT = 9095 + DEFAULT_MGR_PROMETHEUS_PORT = 9283 + + def config(self, spec: ServiceSpec) -> None: + # make sure module is enabled + mgr_map = self.mgr.get('mgr_map') + if 'prometheus' not in mgr_map.get('services', {}): + self.mgr.check_mon_command({ + 'prefix': 'mgr module enable', + 'module': 'prometheus' + }) + # we shouldn't get here (mon will tell the mgr to respawn), but no + # harm done if we do. + + def prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps = [] # type: List[str] + + prom_spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + try: + retention_time = prom_spec.retention_time if prom_spec.retention_time else '15d' + except AttributeError: + retention_time = '15d' + + # scrape mgrs + mgr_scrape_list = [] + mgr_map = self.mgr.get('mgr_map') + port = cast(int, self.mgr.get_module_option_ex( + 'prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT)) + deps.append(str(port)) + t = mgr_map.get('services', {}).get('prometheus', None) + if t: + p_result = urlparse(t) + # urlparse .hostname removes '[]' from the hostname in case + # of ipv6 addresses so if this is the case then we just + # append the brackets when building the final scrape endpoint + if '[' in p_result.netloc and ']' in p_result.netloc: + mgr_scrape_list.append(f"[{p_result.hostname}]:{port}") + else: + mgr_scrape_list.append(f"{p_result.hostname}:{port}") + # scan all mgrs to generate deps and to get standbys too. + # assume that they are all on the same port as the active mgr. + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider the mgr a dep even if the prometheus module is + # disabled in order to be consistent with _calc_daemon_deps(). + deps.append(dd.name()) + if not port: + continue + if dd.daemon_id == self.mgr.get_mgr_id(): + continue + assert dd.hostname is not None + addr = self._inventory_get_fqdn(dd.hostname) + mgr_scrape_list.append(build_url(host=addr, port=port).lstrip('/')) + + # scrape node exporters + nodes = [] + for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): + assert dd.hostname is not None + deps.append(dd.name()) + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else 9100 + nodes.append({ + 'hostname': dd.hostname, + 'url': build_url(host=addr, port=port).lstrip('/') + }) + + # scrape alert managers + alertmgr_targets = [] + for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): + assert dd.hostname is not None + deps.append(dd.name()) + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else 9093 + alertmgr_targets.append("'{}'".format(build_url(host=addr, port=port).lstrip('/'))) + + # scrape haproxies + haproxy_targets = [] + for dd in self.mgr.cache.get_daemons_by_type('ingress'): + if dd.service_name() in self.mgr.spec_store: + spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec) + assert dd.hostname is not None + deps.append(dd.name()) + if dd.daemon_type == 'haproxy': + addr = self._inventory_get_fqdn(dd.hostname) + haproxy_targets.append({ + "url": f"'{build_url(host=addr, port=spec.monitor_port).lstrip('/')}'", + "service": dd.service_name(), + }) + + # generate the prometheus configuration + context = { + 'alertmgr_targets': alertmgr_targets, + 'mgr_scrape_list': mgr_scrape_list, + 'haproxy_targets': haproxy_targets, + 'nodes': nodes, + } + r: Dict[str, Any] = { + 'files': { + 'prometheus.yml': + self.mgr.template.render( + 'services/prometheus/prometheus.yml.j2', context) + }, + 'retention_time': retention_time + } + + # include alerts, if present in the container + if os.path.exists(self.mgr.prometheus_alerts_path): + with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: + alerts = f.read() + r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts + + # Include custom alerts if present in key value store. This enables the + # users to add custom alerts. Write the file in any case, so that if the + # content of the key value store changed, that file is overwritten + # (emptied in case they value has been removed from the key value + # store). This prevents the necessity to adapt `cephadm` binary to + # remove the file. + # + # Don't use the template engine for it as + # + # 1. the alerts are always static and + # 2. they are a template themselves for the Go template engine, which + # use curly braces and escaping that is cumbersome and unnecessary + # for the user. + # + r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \ + self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '') + + return r, sorted(deps) + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # TODO: if there are multiple daemons, who is the active one? + if daemon_descrs: + return daemon_descrs[0] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + dd = self.get_active_daemon(daemon_descrs) + assert dd.hostname is not None + addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + service_url = build_url(scheme='http', host=addr, port=port) + self._set_service_url_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-host', + 'dashboard set-prometheus-api-host', + service_url + ) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1) + if warn and not force: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + return HandleCommandResult(0, warn_message, '') + + +class NodeExporterService(CephadmService): + TYPE = 'node-exporter' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + return {}, [] + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # since node exporter runs on each host and cannot compromise data, no extra checks required + names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids] + out = f'It is presumed safe to stop {names}' + return HandleCommandResult(0, out, '') + + +class SNMPGatewayService(CephadmService): + TYPE = 'snmp-gateway' + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + deps: List[str] = [] + + spec = cast(SNMPGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec) + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + } + if spec.snmp_version == 'V2c': + community = spec.credentials.get('snmp_community', None) + assert community is not None + + config.update({ + "snmp_community": community + }) + else: + # SNMP v3 settings can be either authNoPriv or authPriv + auth_protocol = 'SHA' if not spec.auth_protocol else spec.auth_protocol + + auth_username = spec.credentials.get('snmp_v3_auth_username', None) + auth_password = spec.credentials.get('snmp_v3_auth_password', None) + assert auth_username is not None + assert auth_password is not None + assert spec.engine_id is not None + + config.update({ + "snmp_v3_auth_protocol": auth_protocol, + "snmp_v3_auth_username": auth_username, + "snmp_v3_auth_password": auth_password, + "snmp_v3_engine_id": spec.engine_id, + }) + # authPriv adds encryption + if spec.privacy_protocol: + priv_password = spec.credentials.get('snmp_v3_priv_password', None) + assert priv_password is not None + + config.update({ + "snmp_v3_priv_protocol": spec.privacy_protocol, + "snmp_v3_priv_password": priv_password, + }) + + logger.debug( + f"Generated configuration for '{self.TYPE}' service. Dependencies={deps}") + + return config, sorted(deps) diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py new file mode 100644 index 000000000..ee53283bd --- /dev/null +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -0,0 +1,290 @@ +import errno +import logging +import os +import subprocess +import tempfile +from typing import Dict, Tuple, Any, List, cast, Optional + +from mgr_module import HandleCommandResult +from mgr_module import NFS_POOL_NAME as POOL_NAME + +from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec + +from orchestrator import DaemonDescription + +from cephadm.services.cephadmservice import AuthEntity, CephadmDaemonDeploySpec, CephService + +logger = logging.getLogger(__name__) + + +class NFSService(CephService): + TYPE = 'nfs' + + def ranked(self) -> bool: + return True + + def fence(self, daemon_id: str) -> None: + logger.info(f'Fencing old nfs.{daemon_id}') + ret, out, err = self.mgr.mon_command({ + 'prefix': 'auth rm', + 'entity': f'client.nfs.{daemon_id}', + }) + + # TODO: block/fence this entity (in case it is still running somewhere) + + def fence_old_ranks(self, + spec: ServiceSpec, + rank_map: Dict[int, Dict[int, Optional[str]]], + num_ranks: int) -> None: + for rank, m in list(rank_map.items()): + if rank >= num_ranks: + for daemon_id in m.values(): + if daemon_id is not None: + self.fence(daemon_id) + del rank_map[rank] + nodeid = f'{spec.service_name()}.{rank}' + self.mgr.log.info(f'Removing {nodeid} from the ganesha grace table') + self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid) + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + else: + max_gen = max(m.keys()) + for gen, daemon_id in list(m.items()): + if gen < max_gen: + if daemon_id is not None: + self.fence(daemon_id) + del rank_map[rank][gen] + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + + def config(self, spec: NFSServiceSpec) -> None: # type: ignore + from nfs.cluster import create_ganesha_pool + + assert self.TYPE == spec.service_type + create_ganesha_pool(self.mgr) + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + + daemon_type = daemon_spec.daemon_type + daemon_id = daemon_spec.daemon_id + host = daemon_spec.host + spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + + deps: List[str] = [] + + nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}' + + # create the RADOS recovery pool keyring + rados_user = f'{daemon_type}.{daemon_id}' + rados_keyring = self.create_keyring(daemon_spec) + + # ensure rank is known to ganesha + self.mgr.log.info(f'Ensuring {nodeid} is in the ganesha grace table') + self.run_grace_tool(spec, 'add', nodeid) + + # create the rados config object + self.create_rados_config_obj(spec) + + # create the RGW keyring + rgw_user = f'{rados_user}-rgw' + rgw_keyring = self.create_rgw_keyring(daemon_spec) + + # generate the ganesha config + def get_ganesha_conf() -> str: + context = { + "user": rados_user, + "nodeid": nodeid, + "pool": POOL_NAME, + "namespace": spec.service_id, + "rgw_user": rgw_user, + "url": f'rados://{POOL_NAME}/{spec.service_id}/{spec.rados_config_name()}', + # fall back to default NFS port if not present in daemon_spec + "port": daemon_spec.ports[0] if daemon_spec.ports else 2049, + "bind_addr": daemon_spec.ip if daemon_spec.ip else '', + } + return self.mgr.template.render('services/nfs/ganesha.conf.j2', context) + + # generate the cephadm config json + def get_cephadm_config() -> Dict[str, Any]: + config: Dict[str, Any] = {} + config['pool'] = POOL_NAME + config['namespace'] = spec.service_id + config['userid'] = rados_user + config['extra_args'] = ['-N', 'NIV_EVENT'] + config['files'] = { + 'ganesha.conf': get_ganesha_conf(), + } + config.update( + self.get_config_and_keyring( + daemon_type, daemon_id, + keyring=rados_keyring, + host=host + ) + ) + config['rgw'] = { + 'cluster': 'ceph', + 'user': rgw_user, + 'keyring': rgw_keyring, + } + logger.debug('Generated cephadm config-json: %s' % config) + return config + + return get_cephadm_config(), deps + + def create_rados_config_obj(self, + spec: NFSServiceSpec, + clobber: bool = False) -> None: + objname = spec.rados_config_name() + cmd = [ + 'rados', + '-n', f"mgr.{self.mgr.get_mgr_id()}", + '-k', str(self.mgr.get_ceph_option('keyring')), + '-p', POOL_NAME, + '--namespace', cast(str, spec.service_id), + ] + result = subprocess.run( + cmd + ['get', objname, '-'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if not result.returncode and not clobber: + logger.info('Rados config object exists: %s' % objname) + else: + logger.info('Creating rados config object: %s' % objname) + result = subprocess.run( + cmd + ['put', objname, '-'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if result.returncode: + self.mgr.log.warning( + f'Unable to create rados config object {objname}: {result.stderr.decode("utf-8")}' + ) + raise RuntimeError(result.stderr.decode("utf-8")) + + def create_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str: + daemon_id = daemon_spec.daemon_id + spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + entity: AuthEntity = self.get_auth_entity(daemon_id) + + osd_caps = 'allow rw pool=%s namespace=%s' % (POOL_NAME, spec.service_id) + + logger.info('Creating key for %s' % entity) + keyring = self.get_keyring_with_caps(entity, + ['mon', 'allow r', + 'osd', osd_caps]) + + return keyring + + def create_rgw_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str: + daemon_id = daemon_spec.daemon_id + entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw') + + logger.info('Creating key for %s' % entity) + keyring = self.get_keyring_with_caps(entity, + ['mon', 'allow r', + 'osd', 'allow rwx tag rgw *=*']) + + return keyring + + def run_grace_tool(self, + spec: NFSServiceSpec, + action: str, + nodeid: str) -> None: + # write a temp keyring and referencing config file. this is a kludge + # because the ganesha-grace-tool can only authenticate as a client (and + # not a mgr). Also, it doesn't allow you to pass a keyring location via + # the command line, nor does it parse the CEPH_ARGS env var. + tmp_id = f'mgr.nfs.grace.{spec.service_name()}' + entity = AuthEntity(f'client.{tmp_id}') + keyring = self.get_keyring_with_caps( + entity, + ['mon', 'allow r', 'osd', f'allow rwx pool {POOL_NAME}'] + ) + tmp_keyring = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-keyring') + os.fchmod(tmp_keyring.fileno(), 0o600) + tmp_keyring.write(keyring) + tmp_keyring.flush() + tmp_conf = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-conf') + tmp_conf.write(self.mgr.get_minimal_ceph_conf()) + tmp_conf.write(f'\tkeyring = {tmp_keyring.name}\n') + tmp_conf.flush() + try: + cmd: List[str] = [ + 'ganesha-rados-grace', + '--cephconf', tmp_conf.name, + '--userid', tmp_id, + '--pool', POOL_NAME, + '--ns', cast(str, spec.service_id), + action, nodeid, + ] + self.mgr.log.debug(cmd) + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=10) + if result.returncode: + self.mgr.log.warning( + f'ganesha-rados-grace tool failed: {result.stderr.decode("utf-8")}' + ) + raise RuntimeError(f'grace tool failed: {result.stderr.decode("utf-8")}') + + finally: + self.mgr.check_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + def remove_rgw_keyring(self, daemon: DaemonDescription) -> None: + assert daemon.daemon_id is not None + daemon_id: str = daemon.daemon_id + entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw') + + logger.info(f'Removing key for {entity}') + self.mgr.check_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + self.remove_rgw_keyring(daemon) + + def ok_to_stop(self, + daemon_ids: List[str], + force: bool = False, + known: Optional[List[str]] = None) -> HandleCommandResult: + # if only 1 nfs, alert user (this is not passable with --force) + warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True) + if warn: + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + # if reached here, there is > 1 nfs daemon. + if force: + return HandleCommandResult(0, warn_message, '') + + # if reached here, > 1 nfs daemon and no force flag. + # Provide warning + warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. " + return HandleCommandResult(-errno.EBUSY, '', warn_message) + + def purge(self, service_name: str) -> None: + if service_name not in self.mgr.spec_store: + return + spec = cast(NFSServiceSpec, self.mgr.spec_store[service_name].spec) + + logger.info(f'Removing grace file for {service_name}') + cmd = [ + 'rados', + '-n', f"mgr.{self.mgr.get_mgr_id()}", + '-k', str(self.mgr.get_ceph_option('keyring')), + '-p', POOL_NAME, + '--namespace', cast(str, spec.service_id), + 'rm', 'grace', + ] + subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=10 + ) diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py new file mode 100644 index 000000000..5899ba49a --- /dev/null +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -0,0 +1,956 @@ +import json +import logging +from threading import Lock +from typing import List, Dict, Any, Set, Tuple, cast, Optional, TYPE_CHECKING + +from ceph.deployment import translate +from ceph.deployment.drive_group import DriveGroupSpec +from ceph.deployment.drive_selection import DriveSelection +from ceph.deployment.inventory import Device +from ceph.utils import datetime_to_str, str_to_datetime + +from datetime import datetime +import orchestrator +from cephadm.serve import CephadmServe +from cephadm.utils import forall_hosts +from ceph.utils import datetime_now +from orchestrator import OrchestratorError, DaemonDescription +from mgr_module import MonCommandFailed + +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + +logger = logging.getLogger(__name__) + + +class OSDService(CephService): + TYPE = 'osd' + + def create_from_spec(self, drive_group: DriveGroupSpec) -> str: + logger.debug(f"Processing DriveGroup {drive_group}") + osd_id_claims = OsdIdClaims(self.mgr) + if osd_id_claims.get(): + logger.info( + f"Found osd claims for drivegroup {drive_group.service_id} -> {osd_id_claims.get()}") + + @forall_hosts + def create_from_spec_one(host: str, drive_selection: DriveSelection) -> Optional[str]: + # skip this host if there has been no change in inventory + if not self.mgr.cache.osdspec_needs_apply(host, drive_group): + self.mgr.log.debug("skipping apply of %s on %s (no change)" % ( + host, drive_group)) + return None + # skip this host if we cannot schedule here + if self.mgr.inventory.has_label(host, '_no_schedule'): + return None + + osd_id_claims_for_host = osd_id_claims.filtered_by_host(host) + + cmds: List[str] = self.driveselection_to_ceph_volume(drive_selection, + osd_id_claims_for_host) + if not cmds: + logger.debug("No data_devices, skipping DriveGroup: {}".format( + drive_group.service_id)) + return None + + logger.debug('Applying service osd.%s on host %s...' % ( + drive_group.service_id, host + )) + start_ts = datetime_now() + env_vars: List[str] = [f"CEPH_VOLUME_OSDSPEC_AFFINITY={drive_group.service_id}"] + ret_msg = self.create_single_host( + drive_group, host, cmds, + replace_osd_ids=osd_id_claims_for_host, env_vars=env_vars + ) + self.mgr.cache.update_osdspec_last_applied( + host, drive_group.service_name(), start_ts + ) + self.mgr.cache.save_host(host) + return ret_msg + + ret = create_from_spec_one(self.prepare_drivegroup(drive_group)) + return ", ".join(filter(None, ret)) + + def create_single_host(self, + drive_group: DriveGroupSpec, + host: str, cmds: List[str], replace_osd_ids: List[str], + env_vars: Optional[List[str]] = None) -> str: + for cmd in cmds: + out, err, code = self._run_ceph_volume_command(host, cmd, env_vars=env_vars) + if code == 1 and ', it is already prepared' in '\n'.join(err): + # HACK: when we create against an existing LV, ceph-volume + # returns an error and the above message. To make this + # command idempotent, tolerate this "error" and continue. + logger.debug('the device was already prepared; continuing') + code = 0 + if code: + raise RuntimeError( + 'cephadm exited with an error code: %d, stderr:%s' % ( + code, '\n'.join(err))) + return self.deploy_osd_daemons_for_existing_osds(host, drive_group.service_name(), + replace_osd_ids) + + def deploy_osd_daemons_for_existing_osds(self, host: str, service_name: str, + replace_osd_ids: Optional[List[str]] = None) -> str: + + if replace_osd_ids is None: + replace_osd_ids = OsdIdClaims(self.mgr).filtered_by_host(host) + assert replace_osd_ids is not None + + # check result: lvm + osds_elems: dict = CephadmServe(self.mgr)._run_cephadm_json( + host, 'osd', 'ceph-volume', + [ + '--', + 'lvm', 'list', + '--format', 'json', + ]) + before_osd_uuid_map = self.mgr.get_osd_uuid_map(only_up=True) + fsid = self.mgr._cluster_fsid + osd_uuid_map = self.mgr.get_osd_uuid_map() + created = [] + for osd_id, osds in osds_elems.items(): + for osd in osds: + if osd['type'] == 'db': + continue + if osd['tags']['ceph.cluster_fsid'] != fsid: + logger.debug('mismatched fsid, skipping %s' % osd) + continue + if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids: + # if it exists but is part of the replacement operation, don't skip + continue + if self.mgr.cache.has_daemon(f'osd.{osd_id}', host): + # cephadm daemon instance already exists + logger.debug(f'osd id {osd_id} daemon already exists') + continue + if osd_id not in osd_uuid_map: + logger.debug('osd id {} does not exist in cluster'.format(osd_id)) + continue + if osd_uuid_map.get(osd_id) != osd['tags']['ceph.osd_fsid']: + logger.debug('mismatched osd uuid (cluster has %s, osd ' + 'has %s)' % ( + osd_uuid_map.get(osd_id), + osd['tags']['ceph.osd_fsid'])) + continue + + created.append(osd_id) + daemon_spec: CephadmDaemonDeploySpec = CephadmDaemonDeploySpec( + service_name=service_name, + daemon_id=str(osd_id), + host=host, + daemon_type='osd', + ) + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + CephadmServe(self.mgr)._create_daemon( + daemon_spec, + osd_uuid_map=osd_uuid_map) + + # check result: raw + raw_elems: dict = CephadmServe(self.mgr)._run_cephadm_json( + host, 'osd', 'ceph-volume', + [ + '--', + 'raw', 'list', + '--format', 'json', + ]) + for osd_uuid, osd in raw_elems.items(): + if osd.get('ceph_fsid') != fsid: + continue + osd_id = str(osd.get('osd_id', '-1')) + if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids: + # if it exists but is part of the replacement operation, don't skip + continue + if self.mgr.cache.has_daemon(f'osd.{osd_id}', host): + # cephadm daemon instance already exists + logger.debug(f'osd id {osd_id} daemon already exists') + continue + if osd_id not in osd_uuid_map: + logger.debug('osd id {} does not exist in cluster'.format(osd_id)) + continue + if osd_uuid_map.get(osd_id) != osd_uuid: + logger.debug('mismatched osd uuid (cluster has %s, osd ' + 'has %s)' % (osd_uuid_map.get(osd_id), osd_uuid)) + continue + if osd_id in created: + continue + + created.append(osd_id) + daemon_spec = CephadmDaemonDeploySpec( + service_name=service_name, + daemon_id=osd_id, + host=host, + daemon_type='osd', + ) + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + CephadmServe(self.mgr)._create_daemon( + daemon_spec, + osd_uuid_map=osd_uuid_map) + + if created: + self.mgr.cache.invalidate_host_devices(host) + self.mgr.cache.invalidate_autotune(host) + return "Created osd(s) %s on host '%s'" % (','.join(created), host) + else: + return "Created no osd(s) on host %s; already created?" % host + + def prepare_drivegroup(self, drive_group: DriveGroupSpec) -> List[Tuple[str, DriveSelection]]: + # 1) use fn_filter to determine matching_hosts + matching_hosts = drive_group.placement.filter_matching_hostspecs( + self.mgr._schedulable_hosts()) + # 2) Map the inventory to the InventoryHost object + host_ds_map = [] + + # set osd_id_claims + + def _find_inv_for_host(hostname: str, inventory_dict: dict) -> List[Device]: + # This is stupid and needs to be loaded with the host + for _host, _inventory in inventory_dict.items(): + if _host == hostname: + return _inventory + raise OrchestratorError("No inventory found for host: {}".format(hostname)) + + # 3) iterate over matching_host and call DriveSelection + logger.debug(f"Checking matching hosts -> {matching_hosts}") + for host in matching_hosts: + inventory_for_host = _find_inv_for_host(host, self.mgr.cache.devices) + logger.debug(f"Found inventory for host {inventory_for_host}") + + # List of Daemons on that host + dd_for_spec = self.mgr.cache.get_daemons_by_service(drive_group.service_name()) + dd_for_spec_and_host = [dd for dd in dd_for_spec if dd.hostname == host] + + drive_selection = DriveSelection(drive_group, inventory_for_host, + existing_daemons=len(dd_for_spec_and_host)) + logger.debug(f"Found drive selection {drive_selection}") + if drive_group.method and drive_group.method == 'raw': + # ceph-volume can currently only handle a 1:1 mapping + # of data/db/wal devices for raw mode osds. If db/wal devices + # are defined and the number does not match the number of data + # devices, we need to bail out + if drive_selection.data_devices() and drive_selection.db_devices(): + if len(drive_selection.data_devices()) != len(drive_selection.db_devices()): + raise OrchestratorError('Raw mode only supports a 1:1 ratio of data to db devices. Found ' + f'{len(drive_selection.data_devices())} potential data device(s) and ' + f'{len(drive_selection.db_devices())} potential db device(s) on host {host}') + if drive_selection.data_devices() and drive_selection.wal_devices(): + if len(drive_selection.data_devices()) != len(drive_selection.wal_devices()): + raise OrchestratorError('Raw mode only supports a 1:1 ratio of data to wal devices. Found ' + f'{len(drive_selection.data_devices())} potential data device(s) and ' + f'{len(drive_selection.wal_devices())} potential wal device(s) on host {host}') + host_ds_map.append((host, drive_selection)) + return host_ds_map + + @staticmethod + def driveselection_to_ceph_volume(drive_selection: DriveSelection, + osd_id_claims: Optional[List[str]] = None, + preview: bool = False) -> List[str]: + logger.debug(f"Translating DriveGroup <{drive_selection.spec}> to ceph-volume command") + cmds: List[str] = translate.to_ceph_volume(drive_selection, + osd_id_claims, preview=preview).run() + logger.debug(f"Resulting ceph-volume cmds: {cmds}") + return cmds + + def get_previews(self, host: str) -> List[Dict[str, Any]]: + # Find OSDSpecs that match host. + osdspecs = self.resolve_osdspecs_for_host(host) + return self.generate_previews(osdspecs, host) + + def generate_previews(self, osdspecs: List[DriveGroupSpec], for_host: str) -> List[Dict[str, Any]]: + """ + + The return should look like this: + + [ + {'data': {<metadata>}, + 'osdspec': <name of osdspec>, + 'host': <name of host>, + 'notes': <notes> + }, + + {'data': ..., + 'osdspec': .., + 'host': ..., + 'notes': ... + } + ] + + Note: One host can have multiple previews based on its assigned OSDSpecs. + """ + self.mgr.log.debug(f"Generating OSDSpec previews for {osdspecs}") + ret_all: List[Dict[str, Any]] = [] + if not osdspecs: + return ret_all + for osdspec in osdspecs: + + # populate osd_id_claims + osd_id_claims = OsdIdClaims(self.mgr) + + # prepare driveselection + for host, ds in self.prepare_drivegroup(osdspec): + if host != for_host: + continue + + # driveselection for host + cmds: List[str] = self.driveselection_to_ceph_volume(ds, + osd_id_claims.filtered_by_host(host), + preview=True) + if not cmds: + logger.debug("No data_devices, skipping DriveGroup: {}".format( + osdspec.service_name())) + continue + + # get preview data from ceph-volume + for cmd in cmds: + out, err, code = self._run_ceph_volume_command(host, cmd) + if out: + try: + concat_out: Dict[str, Any] = json.loads(' '.join(out)) + except ValueError: + logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out)) + concat_out = {} + notes = [] + if osdspec.data_devices is not None and osdspec.data_devices.limit and len(concat_out) < osdspec.data_devices.limit: + found = len(concat_out) + limit = osdspec.data_devices.limit + notes.append( + f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit (Found: {found} | Limit: {limit})') + ret_all.append({'data': concat_out, + 'osdspec': osdspec.service_id, + 'host': host, + 'notes': notes}) + return ret_all + + def resolve_hosts_for_osdspecs(self, + specs: Optional[List[DriveGroupSpec]] = None + ) -> List[str]: + osdspecs = [] + if specs: + osdspecs = [cast(DriveGroupSpec, spec) for spec in specs] + if not osdspecs: + self.mgr.log.debug("No OSDSpecs found") + return [] + return sum([spec.placement.filter_matching_hostspecs(self.mgr._schedulable_hosts()) for spec in osdspecs], []) + + def resolve_osdspecs_for_host(self, host: str, + specs: Optional[List[DriveGroupSpec]] = None) -> List[DriveGroupSpec]: + matching_specs = [] + self.mgr.log.debug(f"Finding OSDSpecs for host: <{host}>") + if not specs: + specs = [cast(DriveGroupSpec, spec) for (sn, spec) in self.mgr.spec_store.spec_preview.items() + if spec.service_type == 'osd'] + for spec in specs: + if host in spec.placement.filter_matching_hostspecs(self.mgr._schedulable_hosts()): + self.mgr.log.debug(f"Found OSDSpecs for host: <{host}> -> <{spec}>") + matching_specs.append(spec) + return matching_specs + + def _run_ceph_volume_command(self, host: str, + cmd: str, env_vars: Optional[List[str]] = None + ) -> Tuple[List[str], List[str], int]: + self.mgr.inventory.assert_host(host) + + # get bootstrap key + ret, keyring, err = self.mgr.check_mon_command({ + 'prefix': 'auth get', + 'entity': 'client.bootstrap-osd', + }) + + j = json.dumps({ + 'config': self.mgr.get_minimal_ceph_conf(), + 'keyring': keyring, + }) + + split_cmd = cmd.split(' ') + _cmd = ['--config-json', '-', '--'] + _cmd.extend(split_cmd) + out, err, code = CephadmServe(self.mgr)._run_cephadm( + host, 'osd', 'ceph-volume', + _cmd, + env_vars=env_vars, + stdin=j, + error_ok=True) + return out, err, code + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + # Do not remove the osd.N keyring, if we failed to deploy the OSD, because + # we cannot recover from it. The OSD keys are created by ceph-volume and not by + # us. + if not is_failed_deploy: + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + + +class OsdIdClaims(object): + """ + Retrieve and provide osd ids that can be reused in the cluster + """ + + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr: "CephadmOrchestrator" = mgr + self.osd_host_map: Dict[str, List[str]] = dict() + self.refresh() + + def refresh(self) -> None: + try: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'osd tree', + 'states': ['destroyed'], + 'format': 'json' + }) + except MonCommandFailed as e: + logger.exception('osd tree failed') + raise OrchestratorError(str(e)) + try: + tree = json.loads(out) + except ValueError: + logger.exception(f'Cannot decode JSON: \'{out}\'') + return + + nodes = tree.get('nodes', {}) + for node in nodes: + if node.get('type') == 'host': + self.osd_host_map.update( + {node.get('name'): [str(_id) for _id in node.get('children', list())]} + ) + if self.osd_host_map: + self.mgr.log.info(f"Found osd claims -> {self.osd_host_map}") + + def get(self) -> Dict[str, List[str]]: + return self.osd_host_map + + def filtered_by_host(self, host: str) -> List[str]: + """ + Return the list of osd ids that can be reused in a host + + OSD id claims in CRUSH map are linked to the bare name of + the hostname. In case of FQDN hostnames the host is searched by the + bare name + """ + return self.osd_host_map.get(host.split(".")[0], []) + + +class RemoveUtil(object): + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr: "CephadmOrchestrator" = mgr + + def get_osds_in_cluster(self) -> List[str]: + osd_map = self.mgr.get_osdmap() + return [str(x.get('osd')) for x in osd_map.dump().get('osds', [])] + + def osd_df(self) -> dict: + base_cmd = 'osd df' + ret, out, err = self.mgr.mon_command({ + 'prefix': base_cmd, + 'format': 'json' + }) + try: + return json.loads(out) + except ValueError: + logger.exception(f'Cannot decode JSON: \'{out}\'') + return {} + + def get_pg_count(self, osd_id: int, osd_df: Optional[dict] = None) -> int: + if not osd_df: + osd_df = self.osd_df() + osd_nodes = osd_df.get('nodes', []) + for osd_node in osd_nodes: + if osd_node.get('id') == int(osd_id): + return osd_node.get('pgs', -1) + return -1 + + def find_osd_stop_threshold(self, osds: List["OSD"]) -> Optional[List["OSD"]]: + """ + Cut osd_id list in half until it's ok-to-stop + + :param osds: list of osd_ids + :return: list of ods_ids that can be stopped at once + """ + if not osds: + return [] + while not self.ok_to_stop(osds): + if len(osds) <= 1: + # can't even stop one OSD, aborting + self.mgr.log.debug( + "Can't even stop one OSD. Cluster is probably busy. Retrying later..") + return [] + + # This potentially prolongs the global wait time. + self.mgr.event.wait(1) + # splitting osd_ids in half until ok_to_stop yields success + # maybe popping ids off one by one is better here..depends on the cluster size I guess.. + # There's a lot of room for micro adjustments here + osds = osds[len(osds) // 2:] + return osds + + # todo start draining + # return all([osd.start_draining() for osd in osds]) + + def ok_to_stop(self, osds: List["OSD"]) -> bool: + cmd_args = { + 'prefix': "osd ok-to-stop", + 'ids': [str(osd.osd_id) for osd in osds] + } + return self._run_mon_cmd(cmd_args, error_ok=True) + + def set_osd_flag(self, osds: List["OSD"], flag: str) -> bool: + base_cmd = f"osd {flag}" + self.mgr.log.debug(f"running cmd: {base_cmd} on ids {osds}") + ret, out, err = self.mgr.mon_command({ + 'prefix': base_cmd, + 'ids': [str(osd.osd_id) for osd in osds] + }) + if ret != 0: + self.mgr.log.error(f"Could not set {flag} flag for {osds}. <{err}>") + return False + self.mgr.log.info(f"{','.join([str(o) for o in osds])} now {flag}") + return True + + def get_weight(self, osd: "OSD") -> Optional[float]: + ret, out, err = self.mgr.mon_command({ + 'prefix': 'osd crush tree', + 'format': 'json', + }) + if ret != 0: + self.mgr.log.error(f"Could not dump crush weights. <{err}>") + return None + j = json.loads(out) + for n in j.get("nodes", []): + if n.get("name") == f"osd.{osd.osd_id}": + self.mgr.log.info(f"{osd} crush weight is {n.get('crush_weight')}") + return n.get("crush_weight") + return None + + def reweight_osd(self, osd: "OSD", weight: float) -> bool: + self.mgr.log.debug(f"running cmd: osd crush reweight on {osd}") + ret, out, err = self.mgr.mon_command({ + 'prefix': "osd crush reweight", + 'name': f"osd.{osd.osd_id}", + 'weight': weight, + }) + if ret != 0: + self.mgr.log.error(f"Could not reweight {osd} to {weight}. <{err}>") + return False + self.mgr.log.info(f"{osd} weight is now {weight}") + return True + + def zap_osd(self, osd: "OSD") -> str: + "Zaps all devices that are associated with an OSD" + if osd.hostname is not None: + out, err, code = CephadmServe(self.mgr)._run_cephadm( + osd.hostname, 'osd', 'ceph-volume', + ['--', 'lvm', 'zap', '--destroy', '--osd-id', str(osd.osd_id)], + error_ok=True) + self.mgr.cache.invalidate_host_devices(osd.hostname) + if code: + raise OrchestratorError('Zap failed: %s' % '\n'.join(out + err)) + return '\n'.join(out + err) + raise OrchestratorError(f"Failed to zap OSD {osd.osd_id} because host was unknown") + + def safe_to_destroy(self, osd_ids: List[int]) -> bool: + """ Queries the safe-to-destroy flag for OSDs """ + cmd_args = {'prefix': 'osd safe-to-destroy', + 'ids': [str(x) for x in osd_ids]} + return self._run_mon_cmd(cmd_args, error_ok=True) + + def destroy_osd(self, osd_id: int) -> bool: + """ Destroys an OSD (forcefully) """ + cmd_args = {'prefix': 'osd destroy-actual', + 'id': int(osd_id), + 'yes_i_really_mean_it': True} + return self._run_mon_cmd(cmd_args) + + def purge_osd(self, osd_id: int) -> bool: + """ Purges an OSD from the cluster (forcefully) """ + cmd_args = { + 'prefix': 'osd purge-actual', + 'id': int(osd_id), + 'yes_i_really_mean_it': True + } + return self._run_mon_cmd(cmd_args) + + def _run_mon_cmd(self, cmd_args: dict, error_ok: bool = False) -> bool: + """ + Generic command to run mon_command and evaluate/log the results + """ + ret, out, err = self.mgr.mon_command(cmd_args) + if ret != 0: + self.mgr.log.debug(f"ran {cmd_args} with mon_command") + if not error_ok: + self.mgr.log.error(f"cmd: {cmd_args.get('prefix')} failed with: {err}. (errno:{ret})") + return False + self.mgr.log.debug(f"cmd: {cmd_args.get('prefix')} returns: {out}") + return True + + +class NotFoundError(Exception): + pass + + +class OSD: + + def __init__(self, + osd_id: int, + remove_util: RemoveUtil, + drain_started_at: Optional[datetime] = None, + process_started_at: Optional[datetime] = None, + drain_stopped_at: Optional[datetime] = None, + drain_done_at: Optional[datetime] = None, + draining: bool = False, + started: bool = False, + stopped: bool = False, + replace: bool = False, + force: bool = False, + hostname: Optional[str] = None, + zap: bool = False): + # the ID of the OSD + self.osd_id = osd_id + + # when did process (not the actual draining) start + self.process_started_at = process_started_at + + # when did the drain start + self.drain_started_at = drain_started_at + + # when did the drain stop + self.drain_stopped_at = drain_stopped_at + + # when did the drain finish + self.drain_done_at = drain_done_at + + # did the draining start + self.draining = draining + + # was the operation started + self.started = started + + # was the operation stopped + self.stopped = stopped + + # If this is a replace or remove operation + self.replace = replace + # If we wait for the osd to be drained + self.force = force + # The name of the node + self.hostname = hostname + + # mgr obj to make mgr/mon calls + self.rm_util: RemoveUtil = remove_util + + self.original_weight: Optional[float] = None + + # Whether devices associated with the OSD should be zapped (DATA ERASED) + self.zap = zap + + def start(self) -> None: + if self.started: + logger.debug(f"Already started draining {self}") + return None + self.started = True + self.stopped = False + + def start_draining(self) -> bool: + if self.stopped: + logger.debug(f"Won't start draining {self}. OSD draining is stopped.") + return False + if self.replace: + self.rm_util.set_osd_flag([self], 'out') + else: + self.original_weight = self.rm_util.get_weight(self) + self.rm_util.reweight_osd(self, 0.0) + self.drain_started_at = datetime.utcnow() + self.draining = True + logger.debug(f"Started draining {self}.") + return True + + def stop_draining(self) -> bool: + if self.replace: + self.rm_util.set_osd_flag([self], 'in') + else: + if self.original_weight: + self.rm_util.reweight_osd(self, self.original_weight) + self.drain_stopped_at = datetime.utcnow() + self.draining = False + logger.debug(f"Stopped draining {self}.") + return True + + def stop(self) -> None: + if self.stopped: + logger.debug(f"Already stopped draining {self}") + return None + self.started = False + self.stopped = True + self.stop_draining() + + @property + def is_draining(self) -> bool: + """ + Consider an OSD draining when it is + actively draining but not yet empty + """ + return self.draining and not self.is_empty + + @property + def is_ok_to_stop(self) -> bool: + return self.rm_util.ok_to_stop([self]) + + @property + def is_empty(self) -> bool: + if self.get_pg_count() == 0: + if not self.drain_done_at: + self.drain_done_at = datetime.utcnow() + self.draining = False + return True + return False + + def safe_to_destroy(self) -> bool: + return self.rm_util.safe_to_destroy([self.osd_id]) + + def down(self) -> bool: + return self.rm_util.set_osd_flag([self], 'down') + + def destroy(self) -> bool: + return self.rm_util.destroy_osd(self.osd_id) + + def do_zap(self) -> str: + return self.rm_util.zap_osd(self) + + def purge(self) -> bool: + return self.rm_util.purge_osd(self.osd_id) + + def get_pg_count(self) -> int: + return self.rm_util.get_pg_count(self.osd_id) + + @property + def exists(self) -> bool: + return str(self.osd_id) in self.rm_util.get_osds_in_cluster() + + def drain_status_human(self) -> str: + default_status = 'not started' + status = 'started' if self.started and not self.draining else default_status + status = 'draining' if self.draining else status + status = 'done, waiting for purge' if self.drain_done_at and not self.draining else status + return status + + def pg_count_str(self) -> str: + return 'n/a' if self.get_pg_count() < 0 else str(self.get_pg_count()) + + def to_json(self) -> dict: + out: Dict[str, Any] = dict() + out['osd_id'] = self.osd_id + out['started'] = self.started + out['draining'] = self.draining + out['stopped'] = self.stopped + out['replace'] = self.replace + out['force'] = self.force + out['zap'] = self.zap + out['hostname'] = self.hostname # type: ignore + + for k in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']: + if getattr(self, k): + out[k] = datetime_to_str(getattr(self, k)) + else: + out[k] = getattr(self, k) + return out + + @classmethod + def from_json(cls, inp: Optional[Dict[str, Any]], rm_util: RemoveUtil) -> Optional["OSD"]: + if not inp: + return None + for date_field in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']: + if inp.get(date_field): + inp.update({date_field: str_to_datetime(inp.get(date_field, ''))}) + inp.update({'remove_util': rm_util}) + if 'nodename' in inp: + hostname = inp.pop('nodename') + inp['hostname'] = hostname + return cls(**inp) + + def __hash__(self) -> int: + return hash(self.osd_id) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, OSD): + return NotImplemented + return self.osd_id == other.osd_id + + def __repr__(self) -> str: + return f"osd.{self.osd_id}{' (draining)' if self.draining else ''}" + + +class OSDRemovalQueue(object): + + def __init__(self, mgr: "CephadmOrchestrator") -> None: + self.mgr: "CephadmOrchestrator" = mgr + self.osds: Set[OSD] = set() + self.rm_util = RemoveUtil(mgr) + + # locks multithreaded access to self.osds. Please avoid locking + # network calls, like mon commands. + self.lock = Lock() + + def process_removal_queue(self) -> None: + """ + Performs actions in the _serve() loop to remove an OSD + when criteria is met. + + we can't hold self.lock, as we're calling _remove_daemon in the loop + """ + + # make sure that we don't run on OSDs that are not in the cluster anymore. + self.cleanup() + + # find osds that are ok-to-stop and not yet draining + ready_to_drain_osds = self._ready_to_drain_osds() + if ready_to_drain_osds: + # start draining those + _ = [osd.start_draining() for osd in ready_to_drain_osds] + + all_osds = self.all_osds() + + logger.debug( + f"{self.queue_size()} OSDs are scheduled " + f"for removal: {all_osds}") + + # Check all osds for their state and take action (remove, purge etc) + new_queue: Set[OSD] = set() + for osd in all_osds: # type: OSD + if not osd.force: + # skip criteria + if not osd.is_empty: + logger.debug(f"{osd} is not empty yet. Waiting a bit more") + new_queue.add(osd) + continue + + if not osd.safe_to_destroy(): + logger.debug( + f"{osd} is not safe-to-destroy yet. Waiting a bit more") + new_queue.add(osd) + continue + + # abort criteria + if not osd.down(): + # also remove it from the remove_osd list and set a health_check warning? + raise orchestrator.OrchestratorError( + f"Could not mark {osd} down") + + # stop and remove daemon + assert osd.hostname is not None + + if self.mgr.cache.has_daemon(f'osd.{osd.osd_id}'): + CephadmServe(self.mgr)._remove_daemon(f'osd.{osd.osd_id}', osd.hostname) + logger.info(f"Successfully removed {osd} on {osd.hostname}") + else: + logger.info(f"Daemon {osd} on {osd.hostname} was already removed") + + if osd.replace: + # mark destroyed in osdmap + if not osd.destroy(): + raise orchestrator.OrchestratorError( + f"Could not destroy {osd}") + logger.info( + f"Successfully destroyed old {osd} on {osd.hostname}; ready for replacement") + else: + # purge from osdmap + if not osd.purge(): + raise orchestrator.OrchestratorError(f"Could not purge {osd}") + logger.info(f"Successfully purged {osd} on {osd.hostname}") + + if osd.zap: + # throws an exception if the zap fails + logger.info(f"Zapping devices for {osd} on {osd.hostname}") + osd.do_zap() + logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") + + logger.debug(f"Removing {osd} from the queue.") + + # self could change while this is processing (osds get added from the CLI) + # The new set is: 'an intersection of all osds that are still not empty/removed (new_queue) and + # osds that were added while this method was executed' + with self.lock: + self.osds.intersection_update(new_queue) + self._save_to_store() + + def cleanup(self) -> None: + # OSDs can always be cleaned up manually. This ensures that we run on existing OSDs + with self.lock: + for osd in self._not_in_cluster(): + self.osds.remove(osd) + + def _ready_to_drain_osds(self) -> List["OSD"]: + """ + Returns OSDs that are ok to stop and not yet draining. Only returns as many OSDs as can + be accomodated by the 'max_osd_draining_count' config value, considering the number of OSDs + that are already draining. + """ + draining_limit = max(1, self.mgr.max_osd_draining_count) + num_already_draining = len(self.draining_osds()) + num_to_start_draining = max(0, draining_limit - num_already_draining) + stoppable_osds = self.rm_util.find_osd_stop_threshold(self.idling_osds()) + return [] if stoppable_osds is None else stoppable_osds[:num_to_start_draining] + + def _save_to_store(self) -> None: + osd_queue = [osd.to_json() for osd in self.osds] + logger.debug(f"Saving {osd_queue} to store") + self.mgr.set_store('osd_remove_queue', json.dumps(osd_queue)) + + def load_from_store(self) -> None: + with self.lock: + for k, v in self.mgr.get_store_prefix('osd_remove_queue').items(): + for osd in json.loads(v): + logger.debug(f"Loading osd ->{osd} from store") + osd_obj = OSD.from_json(osd, rm_util=self.rm_util) + if osd_obj is not None: + self.osds.add(osd_obj) + + def as_osd_ids(self) -> List[int]: + with self.lock: + return [osd.osd_id for osd in self.osds] + + def queue_size(self) -> int: + with self.lock: + return len(self.osds) + + def draining_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds if osd.is_draining] + + def idling_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds if not osd.is_draining and not osd.is_empty] + + def empty_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds if osd.is_empty] + + def all_osds(self) -> List["OSD"]: + with self.lock: + return [osd for osd in self.osds] + + def _not_in_cluster(self) -> List["OSD"]: + return [osd for osd in self.osds if not osd.exists] + + def enqueue(self, osd: "OSD") -> None: + if not osd.exists: + raise NotFoundError() + with self.lock: + self.osds.add(osd) + osd.start() + + def rm(self, osd: "OSD") -> None: + if not osd.exists: + raise NotFoundError() + osd.stop() + with self.lock: + try: + logger.debug(f'Removing {osd} from the queue.') + self.osds.remove(osd) + except KeyError: + logger.debug(f"Could not find {osd} in queue.") + raise KeyError + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, OSDRemovalQueue): + return False + with self.lock: + return self.osds == other.osds diff --git a/src/pybind/mgr/cephadm/template.py b/src/pybind/mgr/cephadm/template.py new file mode 100644 index 000000000..0d62e587c --- /dev/null +++ b/src/pybind/mgr/cephadm/template.py @@ -0,0 +1,109 @@ +import copy +from typing import Optional, TYPE_CHECKING + +from jinja2 import Environment, PackageLoader, select_autoescape, StrictUndefined +from jinja2 import exceptions as j2_exceptions + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator + + +class TemplateError(Exception): + pass + + +class UndefinedError(TemplateError): + pass + + +class TemplateNotFoundError(TemplateError): + pass + + +class TemplateEngine: + def render(self, name: str, context: Optional[dict] = None) -> str: + raise NotImplementedError() + + +class Jinja2Engine(TemplateEngine): + def __init__(self) -> None: + self.env = Environment( + loader=PackageLoader('cephadm', 'templates'), + autoescape=select_autoescape(['html', 'xml'], default_for_string=False), + trim_blocks=True, + lstrip_blocks=True, + undefined=StrictUndefined + ) + + def render(self, name: str, context: Optional[dict] = None) -> str: + try: + template = self.env.get_template(name) + if context is None: + return template.render() + return template.render(context) + except j2_exceptions.UndefinedError as e: + raise UndefinedError(e.message) + except j2_exceptions.TemplateNotFound as e: + raise TemplateNotFoundError(e.message) + + def render_plain(self, source: str, context: Optional[dict]) -> str: + try: + template = self.env.from_string(source) + if context is None: + return template.render() + return template.render(context) + except j2_exceptions.UndefinedError as e: + raise UndefinedError(e.message) + except j2_exceptions.TemplateNotFound as e: + raise TemplateNotFoundError(e.message) + + +class TemplateMgr: + def __init__(self, mgr: "CephadmOrchestrator"): + self.engine = Jinja2Engine() + self.base_context = { + 'cephadm_managed': 'This file is generated by cephadm.' + } + self.mgr = mgr + + def render(self, name: str, + context: Optional[dict] = None, + managed_context: bool = True, + host: Optional[str] = None) -> str: + """Render a string from a template with context. + + :param name: template name. e.g. services/nfs/ganesha.conf.j2 + :type name: str + :param context: a dictionary that contains values to be used in the template, defaults + to None + :type context: Optional[dict], optional + :param managed_context: to inject default context like managed header or not, defaults + to True + :type managed_context: bool, optional + :param host: The host name used to build the key to access + the module's persistent key-value store. + :type host: Optional[str], optional + :return: the templated string + :rtype: str + """ + ctx = {} + if managed_context: + ctx = copy.deepcopy(self.base_context) + if context is not None: + ctx = {**ctx, **context} + + # Check if the given name exists in the module's persistent + # key-value store, e.g. + # - blink_device_light_cmd + # - <host>/blink_device_light_cmd + # - services/nfs/ganesha.conf + store_name = name.rstrip('.j2') + custom_template = self.mgr.get_store(store_name, None) + if host and custom_template is None: + store_name = '{}/{}'.format(host, store_name) + custom_template = self.mgr.get_store(store_name, None) + + if custom_template: + return self.engine.render_plain(custom_template, ctx) + else: + return self.engine.render(name, ctx) diff --git a/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2 b/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2 new file mode 100644 index 000000000..dab115833 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2 @@ -0,0 +1 @@ +lsmcli local-disk-{{ ident_fault }}-led-{{'on' if on else 'off'}} --path '{{ path or dev }}' diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 new file mode 100644 index 000000000..4e394106f --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 @@ -0,0 +1,47 @@ +# {{ cephadm_managed }} +# See https://prometheus.io/docs/alerting/configuration/ for documentation. + +global: + resolve_timeout: 5m +{% if not secure %} + http_config: + tls_config: + insecure_skip_verify: true +{% endif %} + +route: + receiver: 'default' + routes: + - group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' +{% if snmp_gateway_urls %} + continue: true + - receiver: 'snmp-gateway' + repeat_interval: 1h + group_interval: 10s + group_by: ['alertname'] + match_re: + oid: "(1.3.6.1.4.1.50495.).*" +{% endif %} + +receivers: +- name: 'default' + webhook_configs: +{% for url in default_webhook_urls %} + - url: '{{ url }}' +{% endfor %} +- name: 'ceph-dashboard' + webhook_configs: +{% for url in dashboard_urls %} + - url: '{{ url }}/api/prometheus_receiver' +{% endfor %} +{% if snmp_gateway_urls %} +- name: 'snmp-gateway' + webhook_configs: +{% for url in snmp_gateway_urls %} + - url: '{{ url }}' +{% endfor %} +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 new file mode 100644 index 000000000..170e6f246 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 @@ -0,0 +1,18 @@ +# {{ cephadm_managed }} +deleteDatasources: +{% for host in hosts %} + - name: 'Dashboard{{ loop.index }}' + orgId: 1 +{% endfor %} + +datasources: +{% for host in hosts %} + - name: 'Dashboard{{ loop.index }}' + type: 'prometheus' + access: 'proxy' + orgId: 1 + url: '{{ host }}' + basicAuth: false + isDefault: {{ 'true' if loop.first else 'false' }} + editable: false +{% endfor %} diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 new file mode 100644 index 000000000..cf23802d7 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 @@ -0,0 +1,24 @@ +# {{ cephadm_managed }} +[users] + default_theme = light +[auth.anonymous] + enabled = true + org_name = 'Main Org.' + org_role = 'Viewer' +[server] + domain = 'bootstrap.storage.lab' + protocol = https + cert_file = /etc/grafana/certs/cert_file + cert_key = /etc/grafana/certs/cert_key + http_port = {{ http_port }} + http_addr = {{ http_addr }} +[security] +{% if not initial_admin_password %} + disable_initial_admin_creation = true +{% else %} + admin_user = admin + admin_password = {{ initial_admin_password }} +{% endif %} + cookie_secure = true + cookie_samesite = none + allow_embedding = true diff --git a/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2 b/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2 new file mode 100644 index 000000000..cb84f1d07 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2 @@ -0,0 +1,83 @@ +# {{ cephadm_managed }} +global + log 127.0.0.1 local2 + chroot /var/lib/haproxy + pidfile /var/lib/haproxy/haproxy.pid + maxconn 8000 + daemon + stats socket /var/lib/haproxy/stats +{% if spec.ssl_cert %} + {% if spec.ssl_dh_param %} + tune.ssl.default-dh-param {{ spec.ssl_dh_param }} + {% endif %} + {% if spec.ssl_ciphers %} + ssl-default-bind-ciphers {{ spec.ssl_ciphers | join(':') }} + {% endif %} + {% if spec.ssl_options %} + ssl-default-bind-options {{ spec.ssl_options | join(' ') }} + {% endif %} +{% endif %} + +defaults + mode {{ mode }} + log global +{% if mode == 'http' %} + option httplog + option dontlognull + option http-server-close + option forwardfor except 127.0.0.0/8 + option redispatch + retries 3 + timeout queue 20s + timeout connect 5s + timeout http-request 1s + timeout http-keep-alive 5s + timeout client 1s + timeout server 1s + timeout check 5s +{% endif %} +{% if mode == 'tcp' %} + timeout queue 1m + timeout connect 10s + timeout client 1m + timeout server 1m + timeout check 10s +{% endif %} + maxconn 8000 + +frontend stats + mode http + bind {{ ip }}:{{ monitor_port }} + bind localhost:{{ monitor_port }} + stats enable + stats uri /stats + stats refresh 10s + stats auth {{ user }}:{{ password }} + http-request use-service prometheus-exporter if { path /metrics } + monitor-uri /health + +frontend frontend +{% if spec.ssl_cert %} + bind {{ ip }}:{{ frontend_port }} ssl crt /var/lib/haproxy/haproxy.pem +{% else %} + bind {{ ip }}:{{ frontend_port }} +{% endif %} + default_backend backend + +backend backend +{% if mode == 'http' %} + option forwardfor + balance static-rr + option httpchk HEAD / HTTP/1.0 + {% for server in servers %} + server {{ server.name }} {{ server.ip }}:{{ server.port }} check weight 100 + {% endfor %} +{% endif %} +{% if mode == 'tcp' %} + mode tcp + balance source + hash-type consistent + {% for server in servers %} + server {{ server.name }} {{ server.ip }}:{{ server.port }} + {% endfor %} +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 new file mode 100644 index 000000000..f560c9756 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 @@ -0,0 +1,34 @@ +# {{ cephadm_managed }} +vrrp_script check_backend { + script "{{ script }}" + weight -20 + interval 2 + rise 2 + fall 2 +} + +{% for x in range(virtual_ips|length) %} +vrrp_instance VI_{{ x }} { + state {{ states[x] }} + priority {{ priorities[x] }} + interface {{ interface }} + virtual_router_id {{ 50 + x }} + advert_int 1 + authentication { + auth_type PASS + auth_pass {{ password }} + } + unicast_src_ip {{ host_ip }} + unicast_peer { + {% for ip in other_ips %} + {{ ip }} + {% endfor %} + } + virtual_ipaddress { + {{ virtual_ips[x] }} dev {{ interface }} + } + track_script { + check_backend + } +} +{% endfor %} diff --git a/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2 b/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2 new file mode 100644 index 000000000..c2582ace7 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2 @@ -0,0 +1,13 @@ +# {{ cephadm_managed }} +[config] +cluster_client_name = {{ client_name }} +pool = {{ spec.pool }} +trusted_ip_list = {{ trusted_ip_list|default("''", true) }} +minimum_gateways = 1 +api_port = {{ spec.api_port|default("''", true) }} +api_user = {{ spec.api_user|default("''", true) }} +api_password = {{ spec.api_password|default("''", true) }} +api_secure = {{ spec.api_secure|default('False', true) }} +log_to_stderr = True +log_to_stderr_prefix = debug +log_to_file = False diff --git a/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 new file mode 100644 index 000000000..9d6e15f1c --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 @@ -0,0 +1,35 @@ +# {{ cephadm_managed }} +NFS_CORE_PARAM { + Enable_NLM = false; + Enable_RQUOTA = false; + Protocols = 4; + NFS_Port = {{ port }}; +{% if bind_addr %} + Bind_addr = {{ bind_addr }}; +{% endif %} +} + +NFSv4 { + Delegations = false; + RecoveryBackend = 'rados_cluster'; + Minor_Versions = 1, 2; +} + +RADOS_KV { + UserId = "{{ user }}"; + nodeid = "{{ nodeid }}"; + pool = "{{ pool }}"; + namespace = "{{ namespace }}"; +} + +RADOS_URLS { + UserId = "{{ user }}"; + watch_url = "{{ url }}"; +} + +RGW { + cluster = "ceph"; + name = "client.{{ rgw_user }}"; +} + +%url {{ url }} diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 new file mode 100644 index 000000000..bb0a8fcae --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -0,0 +1,41 @@ +# {{ cephadm_managed }} +global: + scrape_interval: 10s + evaluation_interval: 10s +rule_files: + - /etc/prometheus/alerting/* +{% if alertmgr_targets %} +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: [{{ alertmgr_targets|join(', ') }}] +{% endif %} +scrape_configs: + - job_name: 'ceph' + honor_labels: true + static_configs: + - targets: +{% for mgr in mgr_scrape_list %} + - '{{ mgr }}' +{% endfor %} + +{% if nodes %} + - job_name: 'node' + static_configs: +{% for node in nodes %} + - targets: ['{{ node.url }}'] + labels: + instance: '{{ node.hostname }}' +{% endfor %} +{% endif %} + +{% if haproxy_targets %} + - job_name: 'haproxy' + static_configs: +{% for haproxy in haproxy_targets %} + - targets: [{{ haproxy.url }}] + labels: + instance: '{{ haproxy.service }}' +{% endfor %} +{% endif %} diff --git a/src/pybind/mgr/cephadm/tests/__init__.py b/src/pybind/mgr/cephadm/tests/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/__init__.py diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py new file mode 100644 index 000000000..e8add2c7b --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + +from cephadm.services.osd import RemoveUtil, OSD +from tests import mock + +from .fixtures import with_cephadm_module + + +@pytest.fixture() +def cephadm_module(): + with with_cephadm_module({}) as m: + yield m + + +@pytest.fixture() +def rm_util(): + with with_cephadm_module({}) as m: + r = RemoveUtil.__new__(RemoveUtil) + r.__init__(m) + yield r + + +@pytest.fixture() +def osd_obj(): + with mock.patch("cephadm.services.osd.RemoveUtil"): + o = OSD(0, mock.MagicMock()) + yield o diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py new file mode 100644 index 000000000..8c2e1cfbf --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -0,0 +1,151 @@ +import fnmatch +from contextlib import contextmanager + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec +from ceph.utils import datetime_to_str, datetime_now +from cephadm.serve import CephadmServe + +try: + from typing import Any, Iterator, List +except ImportError: + pass + +from cephadm import CephadmOrchestrator +from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus +from tests import mock + + +def get_ceph_option(_, key): + return __file__ + + +def get_module_option_ex(_, module, key, default=None): + if module == 'prometheus': + if key == 'server_port': + return 9283 + return None + + +def _run_cephadm(ret): + def foo(s, host, entity, cmd, e, **kwargs): + if cmd == 'gather-facts': + return '{}', '', 0 + return [ret], '', 0 + return foo + + +def match_glob(val, pat): + ok = fnmatch.fnmatchcase(val, pat) + if not ok: + assert pat in val + + +@contextmanager +def with_cephadm_module(module_options=None, store=None): + """ + :param module_options: Set opts as if they were set before module.__init__ is called + :param store: Set the store before module.__init__ is called + """ + with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option),\ + mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \ + mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex),\ + mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \ + mock.patch("cephadm.module.CephadmOrchestrator.remote"), \ + mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'): + + m = CephadmOrchestrator.__new__(CephadmOrchestrator) + if module_options is not None: + for k, v in module_options.items(): + m._ceph_set_module_option('cephadm', k, v) + if store is None: + store = {} + if '_ceph_get/mon_map' not in store: + m.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + if '_ceph_get/mgr_map' not in store: + m.mock_store_set('_ceph_get', 'mgr_map', { + 'services': { + 'dashboard': 'http://[::1]:8080', + 'prometheus': 'http://[::1]:8081' + }, + 'modules': ['dashboard', 'prometheus'], + }) + for k, v in store.items(): + m._ceph_set_store(k, v) + + m.__init__('cephadm', 0, 0) + m._cluster_fsid = "fsid" + yield m + + +def wait(m: CephadmOrchestrator, c: OrchResult) -> Any: + return raise_if_exception(c) + + +@contextmanager +def with_host(m: CephadmOrchestrator, name, addr='1::4', refresh_hosts=True, rm_with_force=True): + with mock.patch("cephadm.utils.resolve_ip", return_value=addr): + wait(m, m.add_host(HostSpec(hostname=name))) + if refresh_hosts: + CephadmServe(m)._refresh_hosts_and_daemons() + yield + wait(m, m.remove_host(name, force=rm_with_force)) + + +def assert_rm_service(cephadm: CephadmOrchestrator, srv_name): + mon_or_mgr = cephadm.spec_store[srv_name].spec.service_type in ('mon', 'mgr') + if mon_or_mgr: + assert 'Unable' in wait(cephadm, cephadm.remove_service(srv_name)) + return + assert wait(cephadm, cephadm.remove_service(srv_name)) == f'Removed service {srv_name}' + assert cephadm.spec_store[srv_name].deleted is not None + CephadmServe(cephadm)._check_daemons() + CephadmServe(cephadm)._apply_all_services() + assert cephadm.spec_store[srv_name].deleted + unmanaged = cephadm.spec_store[srv_name].spec.unmanaged + CephadmServe(cephadm)._purge_deleted_services() + if not unmanaged: # cause then we're not deleting daemons + assert srv_name not in cephadm.spec_store, f'{cephadm.spec_store[srv_name]!r}' + + +@contextmanager +def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]: + if spec.placement.is_empty() and host: + spec.placement = PlacementSpec(hosts=[host], count=1) + if meth is not None: + c = meth(cephadm_module, spec) + assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...' + else: + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...'] + + specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())] + assert spec in specs + + CephadmServe(cephadm_module)._apply_all_services() + + if status_running: + make_daemons_running(cephadm_module, spec.service_name()) + + dds = wait(cephadm_module, cephadm_module.list_daemons()) + own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()] + if host and spec.service_type != 'osd': + assert own_dds + + yield [dd.name() for dd in own_dds] + + assert_rm_service(cephadm_module, spec.service_name()) + + +def make_daemons_running(cephadm_module, service_name): + own_dds = cephadm_module.cache.get_daemons_by_service(service_name) + for dd in own_dds: + dd.status = DaemonDescriptionStatus.running # We're changing the reference + + +def _deploy_cephadm_binary(host): + def foo(*args, **kwargs): + return True + return foo diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py new file mode 100644 index 000000000..524da9c00 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_autotune.py @@ -0,0 +1,69 @@ +# Disable autopep8 for this file: + +# fmt: off + +import pytest + +from cephadm.autotune import MemoryAutotuner +from orchestrator import DaemonDescription + + +@pytest.mark.parametrize("total,daemons,config,result", + [ # noqa: E128 + ( + 128 * 1024 * 1024 * 1024, + [], + {}, + None, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + ], + {}, + 64 * 1024 * 1024 * 1024, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + DaemonDescription('osd', '3', 'host1'), + ], + { + 'osd.3': 16 * 1024 * 1024 * 1024, + }, + 56 * 1024 * 1024 * 1024, + ), + ( + 128 * 1024 * 1024 * 1024, + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + ], + {}, + 62 * 1024 * 1024 * 1024, + ) + ]) +def test_autotune(total, daemons, config, result): + def fake_getter(who, opt): + if opt == 'osd_memory_target_autotune': + if who in config: + return False + else: + return True + if opt == 'osd_memory_target': + return config.get(who, 4 * 1024 * 1024 * 1024) + if opt == 'mds_cache_memory_limit': + return 16 * 1024 * 1024 * 1024 + + a = MemoryAutotuner( + total_mem=total, + daemons=daemons, + config_get=fake_getter, + ) + val, osds = a.tune() + assert val == result diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py new file mode 100644 index 000000000..a6850f6cb --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -0,0 +1,1805 @@ +import json +import logging +from contextlib import contextmanager + +import pytest + +from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection +from cephadm.serve import CephadmServe +from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims + +try: + from typing import List +except ImportError: + pass + +from execnet.gateway_bootstrap import HostNotFound + +from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, RGWSpec, \ + NFSServiceSpec, IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec, MDSSpec +from ceph.deployment.drive_selection.selector import DriveSelection +from ceph.deployment.inventory import Devices, Device +from ceph.utils import datetime_to_str, datetime_now +from orchestrator import DaemonDescription, InventoryHost, \ + HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent +from tests import mock +from .fixtures import wait, _run_cephadm, match_glob, with_host, \ + with_cephadm_module, with_service, _deploy_cephadm_binary, make_daemons_running +from cephadm.module import CephadmOrchestrator + +""" +TODOs: + There is really room for improvement here. I just quickly assembled theses tests. + I general, everything should be testes in Teuthology as well. Reasons for + also testing this here is the development roundtrip time. +""" + + +def assert_rm_daemon(cephadm: CephadmOrchestrator, prefix, host): + dds: List[DaemonDescription] = wait(cephadm, cephadm.list_daemons(host=host)) + d_names = [dd.name() for dd in dds if dd.name().startswith(prefix)] + assert d_names + # there should only be one daemon (if not match_glob will throw mismatch) + assert len(d_names) == 1 + + c = cephadm.remove_daemons(d_names) + [out] = wait(cephadm, c) + # picking the 1st element is needed, rather than passing the list when the daemon + # name contains '-' char. If not, the '-' is treated as a range i.e. cephadm-exporter + # is treated like a m-e range which is invalid. rbd-mirror (d-m) and node-exporter (e-e) + # are valid, so pass without incident! Also, match_gob acts on strings anyway! + match_glob(out, f"Removed {d_names[0]}* from host '{host}'") + + +@contextmanager +def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, host: str): + spec.placement = PlacementSpec(hosts=[host], count=1) + + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'") + + dds = cephadm_module.cache.get_daemons_by_service(spec.service_name()) + for dd in dds: + if dd.hostname == host: + yield dd.daemon_id + assert_rm_daemon(cephadm_module, spec.service_name(), host) + return + + assert False, 'Daemon not found' + + +@contextmanager +def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str, osd_id: int, ceph_volume_lvm_list=None): + cephadm_module.mock_store_set('_ceph_get', 'osd_map', { + 'osds': [ + { + 'osd': 1, + 'up_from': 0, + 'up': True, + 'uuid': 'uuid' + } + ] + }) + + _run_cephadm.reset_mock(return_value=True, side_effect=True) + if ceph_volume_lvm_list: + _run_cephadm.side_effect = ceph_volume_lvm_list + else: + def _ceph_volume_list(s, host, entity, cmd, **kwargs): + logging.info(f'ceph-volume cmd: {cmd}') + if 'raw' in cmd: + return json.dumps({ + "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": { + "ceph_fsid": cephadm_module._cluster_fsid, + "device": "/dev/loop0", + "osd_id": 21, + "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5", + "type": "bluestore" + }, + }), '', 0 + if 'lvm' in cmd: + return json.dumps({ + str(osd_id): [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }] + }), '', 0 + return '{}', '', 0 + + _run_cephadm.side_effect = _ceph_volume_list + + assert cephadm_module._osd_activate( + [host]).stdout == f"Created osd(s) 1 on host '{host}'" + assert _run_cephadm.mock_calls == [ + mock.call(host, 'osd', 'ceph-volume', + ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image=''), + mock.call(host, f'osd.{osd_id}', 'deploy', + ['--name', f'osd.{osd_id}', '--meta-json', mock.ANY, + '--config-json', '-', '--osd-fsid', 'uuid'], + stdin=mock.ANY, image=''), + mock.call(host, 'osd', 'ceph-volume', + ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image=''), + ] + dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host) + assert dd.name() == f'osd.{osd_id}' + yield dd + cephadm_module._remove_daemons([(f'osd.{osd_id}', host)]) + + +class TestCephadm(object): + + def test_get_unique_name(self, cephadm_module): + # type: (CephadmOrchestrator) -> None + existing = [ + DaemonDescription(daemon_type='mon', daemon_id='a') + ] + new_mon = cephadm_module.get_unique_name('mon', 'myhost', existing) + match_glob(new_mon, 'myhost') + new_mgr = cephadm_module.get_unique_name('mgr', 'myhost', existing) + match_glob(new_mgr, 'myhost.*') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_host(self, cephadm_module): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + with with_host(cephadm_module, 'test'): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')] + + # Be careful with backward compatibility when changing things here: + assert json.loads(cephadm_module.get_store('inventory')) == \ + {"test": {"hostname": "test", "addr": "1::4", "labels": [], "status": ""}} + + with with_host(cephadm_module, 'second', '1.2.3.5'): + assert wait(cephadm_module, cephadm_module.get_hosts()) == [ + HostSpec('test', '1::4'), + HostSpec('second', '1.2.3.5') + ] + + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')] + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + @mock.patch("cephadm.utils.resolve_ip") + def test_re_add_host_receive_loopback(self, resolve_ip, cephadm_module): + resolve_ip.side_effect = ['192.168.122.1', '127.0.0.1', '127.0.0.1'] + assert wait(cephadm_module, cephadm_module.get_hosts()) == [] + cephadm_module._add_host(HostSpec('test', '192.168.122.1')) + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '192.168.122.1')] + cephadm_module._add_host(HostSpec('test')) + assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '192.168.122.1')] + with pytest.raises(OrchestratorError): + cephadm_module._add_host(HostSpec('test2')) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_service_ls(self, cephadm_module): + with with_host(cephadm_module, 'test'): + c = cephadm_module.list_daemons(refresh=True) + assert wait(cephadm_module, c) == [] + with with_service(cephadm_module, MDSSpec('mds', 'name', unmanaged=True)) as _, \ + with_daemon(cephadm_module, MDSSpec('mds', 'name'), 'test') as _: + + c = cephadm_module.list_daemons() + + def remove_id_events(dd): + out = dd.to_json() + del out['daemon_id'] + del out['events'] + del out['daemon_name'] + return out + + assert [remove_id_events(dd) for dd in wait(cephadm_module, c)] == [ + { + 'service_name': 'mds.name', + 'daemon_type': 'mds', + 'hostname': 'test', + 'status': 2, + 'status_desc': 'starting', + 'is_active': False, + 'ports': [], + } + ] + + with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'), + CephadmOrchestrator.apply_rgw, 'test', status_running=True): + make_daemons_running(cephadm_module, 'mds.name') + + c = cephadm_module.describe_service() + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_id': 'name', + 'service_name': 'mds.name', + 'service_type': 'mds', + 'status': {'created': mock.ANY, 'running': 1, 'size': 2}, + 'unmanaged': True + }, + { + 'placement': { + 'count': 1, + 'hosts': ["test"] + }, + 'service_id': 'r.z', + 'service_name': 'rgw.r.z', + 'service_type': 'rgw', + 'status': {'created': mock.ANY, 'running': 1, 'size': 1, + 'ports': [80]}, + } + ] + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_service_ls_service_type_flag(self, cephadm_module): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True): + with with_service(cephadm_module, MDSSpec('mds', 'test-id', placement=PlacementSpec(count=2)), + CephadmOrchestrator.apply_mds, '', status_running=True): + + # with no service-type. Should provide info fot both services + c = cephadm_module.describe_service() + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_name': 'mgr', + 'service_type': 'mgr', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + { + 'placement': {'count': 2}, + 'service_id': 'test-id', + 'service_name': 'mds.test-id', + 'service_type': 'mds', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + ] + + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + # with service-type. Should provide info fot only mds + c = cephadm_module.describe_service(service_type='mds') + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + expected = [ + { + 'placement': {'count': 2}, + 'service_id': 'test-id', + 'service_name': 'mds.test-id', + 'service_type': 'mds', + 'status': {'created': mock.ANY, + 'running': 2, + 'size': 2} + }, + ] + + for o in out: + if 'events' in o: + del o['events'] # delete it, as it contains a timestamp + assert out == expected + + # service-type should not match with service names + c = cephadm_module.describe_service(service_type='mds.test-id') + out = [dict(o.to_json()) for o in wait(cephadm_module, c)] + assert out == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_device_ls(self, cephadm_module): + with with_host(cephadm_module, 'test'): + c = cephadm_module.get_inventory() + assert wait(cephadm_module, c) == [InventoryHost('test')] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='rgw.myrgw.foobar', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ), + dict( + name='something.foo.bar', + style='cephadm', + fsid='fsid', + ), + dict( + name='haproxy.test.bar', + style='cephadm', + fsid='fsid', + ), + + ]) + )) + def test_list_daemons(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + dds = wait(cephadm_module, cephadm_module.list_daemons()) + assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'} + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_daemon_action(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \ + with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id: + + d_name = 'rgw.' + daemon_id + + c = cephadm_module.daemon_action('redeploy', d_name) + assert wait(cephadm_module, + c) == f"Scheduled to redeploy rgw.{daemon_id} on host 'test'" + + for what in ('start', 'stop', 'restart'): + c = cephadm_module.daemon_action(what, d_name) + assert wait(cephadm_module, + c) == F"Scheduled to {what} {d_name} on host 'test'" + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module._store['_ceph_get/mon_map'] = { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + } + cephadm_module.notify('mon_map', None) + + CephadmServe(cephadm_module)._check_daemons() + + assert cephadm_module.events.get_for_daemon(d_name) == [ + OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO', + f"Deployed {d_name} on host \'test\'"), + OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO', + f"stop {d_name} from host \'test\'"), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_daemon_action_fail(self, cephadm_module: CephadmOrchestrator): + cephadm_module.service_cache_timeout = 10 + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \ + with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id: + with mock.patch('ceph_module.BaseMgrModule._ceph_send_command') as _ceph_send_command: + + _ceph_send_command.side_effect = Exception("myerror") + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + + CephadmServe(cephadm_module)._check_daemons() + + evs = [e.message for e in cephadm_module.events.get_for_daemon( + f'rgw.{daemon_id}')] + + assert 'myerror' in ''.join(evs) + + @pytest.mark.parametrize( + "action", + [ + 'start', + 'stop', + 'restart', + 'reconfig', + 'redeploy' + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_daemon_check(self, cephadm_module: CephadmOrchestrator, action): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test') as d_names: + [daemon_name] = d_names + + cephadm_module._schedule_daemon_action(daemon_name, action) + + assert cephadm_module.cache.get_scheduled_daemon_action( + 'test', daemon_name) == action + + CephadmServe(cephadm_module)._check_daemons() + + assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_daemon_check_extra_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + with with_host(cephadm_module, 'test'): + + # Also testing deploying mons without explicit network placement + cephadm_module.check_mon_command({ + 'prefix': 'config set', + 'who': 'mon', + 'name': 'public_network', + 'value': '127.0.0.0/8' + }) + + cephadm_module.cache.update_host_devices_networks( + 'test', + [], + { + "127.0.0.0/8": [ + "127.0.0.1" + ], + } + ) + + with with_service(cephadm_module, ServiceSpec(service_type='mon'), CephadmOrchestrator.apply_mon, 'test') as d_names: + [daemon_name] = d_names + + cephadm_module._set_extra_ceph_conf('[mon]\nk=v') + + CephadmServe(cephadm_module)._check_daemons() + + _run_cephadm.assert_called_with( + 'test', 'mon.test', 'deploy', [ + '--name', 'mon.test', + '--meta-json', '{"service_name": "mon", "ports": [], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', + '--reconfig', + ], + stdin='{"config": "\\n\\n[mon]\\nk=v\\n[mon.test]\\npublic network = 127.0.0.0/8\\n", ' + + '"keyring": "", "files": {"config": "[mon.test]\\npublic network = 127.0.0.0/8\\n"}}', + image='') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='crash', extra_container_args=['--cpus=2', '--quiet']), CephadmOrchestrator.apply_crash): + _run_cephadm.assert_called_with( + 'test', 'crash.test', 'deploy', [ + '--name', 'crash.test', + '--meta-json', '{"service_name": "crash", "ports": [], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": ["--cpus=2", "--quiet"]}', + '--config-json', '-', + '--extra-container-args=--cpus=2', + '--extra-container-args=--quiet' + ], + stdin='{"config": "", "keyring": ""}', + image='', + ) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test'): + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + cephadm_module.mock_store_set('_ceph_get', 'mgr_map', { + 'modules': ['dashboard'] + }) + + with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd: + CephadmServe(cephadm_module)._check_daemons() + _mon_cmd.assert_any_call( + {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://[1::4]:3000'}, + None) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4') + def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator): + # https://tracker.ceph.com/issues/52866 + with with_host(cephadm_module, 'test1'): + with with_host(cephadm_module, 'test2'): + with with_service(cephadm_module, IscsiServiceSpec(service_id='foobar', pool='pool', placement=PlacementSpec(host_pattern='*')), CephadmOrchestrator.apply_iscsi, 'test'): + + CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2 + + # get a deamons from postaction list (ARRGH sets!!) + tempset = cephadm_module.requires_post_actions.copy() + tempdeamon1 = tempset.pop() + tempdeamon2 = tempset.pop() + + # make sure post actions has 2 daemons in it + assert len(cephadm_module.requires_post_actions) == 2 + + # replicate a host cache that is not in sync when check_daemons is called + tempdd1 = cephadm_module.cache.get_daemon(tempdeamon1) + tempdd2 = cephadm_module.cache.get_daemon(tempdeamon2) + host = 'test1' + if 'test1' not in tempdeamon1: + host = 'test2' + cephadm_module.cache.rm_daemon(host, tempdeamon1) + + # Make sure, _check_daemons does a redeploy due to monmap change: + cephadm_module.mock_store_set('_ceph_get', 'mon_map', { + 'modified': datetime_to_str(datetime_now()), + 'fsid': 'foobar', + }) + cephadm_module.notify('mon_map', None) + cephadm_module.mock_store_set('_ceph_get', 'mgr_map', { + 'modules': ['dashboard'] + }) + + with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db: + CephadmServe(cephadm_module)._check_daemons() + _cfg_db.assert_called_once_with([tempdd2]) + + # post actions still has the other deamon in it and will run next _check_deamons + assert len(cephadm_module.requires_post_actions) == 1 + + # post actions was missed for a daemon + assert tempdeamon1 in cephadm_module.requires_post_actions + + # put the daemon back in the cache + cephadm_module.cache.add_daemon(host, tempdd1) + + _cfg_db.reset_mock() + # replicate serve loop running again + CephadmServe(cephadm_module)._check_daemons() + + # post actions should have been called again + _cfg_db.asset_called() + + # post actions is now empty + assert len(cephadm_module.requires_post_actions) == 0 + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_mon_add(self, cephadm_module): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='mon', unmanaged=True)): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps)) + assert wait(cephadm_module, c) == ["Deployed mon.a on host 'test'"] + + with pytest.raises(OrchestratorError, match="Must set public_network config option or specify a CIDR network,"): + ps = PlacementSpec(hosts=['test'], count=1) + c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps)) + wait(cephadm_module, c) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_mgr_update(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert r + + assert_rm_daemon(cephadm_module, 'mgr.a', 'test') + + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds(self, _mon_cmd, cephadm_module): + dict_out = { + "nodes": [ + { + "id": -1, + "name": "default", + "type": "root", + "type_id": 11, + "children": [ + -3 + ] + }, + { + "id": -3, + "name": "host1", + "type": "host", + "type_id": 1, + "pool_weights": {}, + "children": [ + 0 + ] + }, + { + "id": 0, + "device_class": "hdd", + "name": "osd.0", + "type": "osd", + "type_id": 0, + "crush_weight": 0.0243988037109375, + "depth": 2, + "pool_weights": {}, + "exists": 1, + "status": "destroyed", + "reweight": 1, + "primary_affinity": 1 + } + ], + "stray": [] + } + json_out = json.dumps(dict_out) + _mon_cmd.return_value = (0, json_out, '') + osd_claims = OsdIdClaims(cephadm_module) + assert osd_claims.get() == {'host1': ['0']} + assert osd_claims.filtered_by_host('host1') == ['0'] + assert osd_claims.filtered_by_host('host1.domain.com') == ['0'] + + @ pytest.mark.parametrize( + "ceph_services, cephadm_daemons, strays_expected, metadata", + # [ ([(daemon_type, daemon_id), ... ], [...], [...]), ... ] + [ + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [], + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + {}, + ), + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [], + {}, + ), + ( + [('mds', 'a'), ('osd', '0'), ('mgr', 'x')], + [('mds', 'a'), ('osd', '0')], + [('mgr', 'x')], + {}, + ), + # https://tracker.ceph.com/issues/49573 + ( + [('rgw-nfs', '14649')], + [], + [('nfs', 'foo-rgw.host1')], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}}, + ), + ( + [('rgw-nfs', '14649'), ('rgw-nfs', '14650')], + [('nfs', 'foo-rgw.host1'), ('nfs', 'foo2.host2')], + [], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}}, + ), + ( + [('rgw-nfs', '14649'), ('rgw-nfs', '14650')], + [('nfs', 'foo-rgw.host1')], + [('nfs', 'foo2.host2')], + {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}}, + ), + ] + ) + def test_check_for_stray_daemons( + self, + cephadm_module, + ceph_services, + cephadm_daemons, + strays_expected, + metadata + ): + # mock ceph service-map + services = [] + for service in ceph_services: + s = {'type': service[0], 'id': service[1]} + services.append(s) + ls = [{'hostname': 'host1', 'services': services}] + + with mock.patch.object(cephadm_module, 'list_servers', mock.MagicMock()) as list_servers: + list_servers.return_value = ls + list_servers.__iter__.side_effect = ls.__iter__ + + # populate cephadm daemon cache + dm = {} + for daemon_type, daemon_id in cephadm_daemons: + dd = DaemonDescription(daemon_type=daemon_type, daemon_id=daemon_id) + dm[dd.name()] = dd + cephadm_module.cache.update_host_daemons('host1', dm) + + def get_metadata_mock(svc_type, svc_id, default): + return metadata[svc_id] + + with mock.patch.object(cephadm_module, 'get_metadata', new_callable=lambda: get_metadata_mock): + + # test + CephadmServe(cephadm_module)._check_for_strays() + + # verify + strays = cephadm_module.health_checks.get('CEPHADM_STRAY_DAEMON') + if not strays: + assert len(strays_expected) == 0 + else: + for dt, di in strays_expected: + name = '%s.%s' % (dt, di) + for detail in strays['detail']: + if name in detail: + strays['detail'].remove(detail) + break + assert name in detail + assert len(strays['detail']) == 0 + assert strays['count'] == len(strays_expected) + + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds_cmd_failure(self, _mon_cmd, cephadm_module): + _mon_cmd.return_value = (1, "", "fail_msg") + with pytest.raises(OrchestratorError): + OsdIdClaims(cephadm_module) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_osd_save(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + + spec = DriveGroupSpec( + service_id='foo', + placement=PlacementSpec( + host_pattern='*', + ), + data_devices=DeviceSelection( + all=True + ) + ) + + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.foo update...'] + + inventory = Devices([ + Device( + '/dev/sdb', + available=True + ), + ]) + + cephadm_module.cache.update_host_devices_networks('test', inventory.devices, {}) + + _run_cephadm.return_value = (['{}'], '', 0) + + assert CephadmServe(cephadm_module)._apply_all_services() is False + + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', + ['--config-json', '-', '--', 'lvm', 'batch', + '--no-auto', '/dev/sdb', '--yes', '--no-systemd'], + env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, stdin='{"config": "", "keyring": ""}') + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False) + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + + spec = DriveGroupSpec( + service_id='noncollocated', + placement=PlacementSpec( + hosts=['test'] + ), + data_devices=DeviceSelection(paths=['/dev/sdb']), + db_devices=DeviceSelection(paths=['/dev/sdc']), + wal_devices=DeviceSelection(paths=['/dev/sdd']) + ) + + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.noncollocated update...'] + + inventory = Devices([ + Device('/dev/sdb', available=True), + Device('/dev/sdc', available=True), + Device('/dev/sdd', available=True) + ]) + + cephadm_module.cache.update_host_devices_networks('test', inventory.devices, {}) + + _run_cephadm.return_value = (['{}'], '', 0) + + assert CephadmServe(cephadm_module)._apply_all_services() is False + + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', + ['--config-json', '-', '--', 'lvm', 'batch', + '--no-auto', '/dev/sdb', '--db-devices', '/dev/sdc', + '--wal-devices', '/dev/sdd', '--yes', '--no-systemd'], + env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'], + error_ok=True, stdin='{"config": "", "keyring": ""}') + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False) + _run_cephadm.assert_any_call( + 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.module.SpecStore.save") + def test_apply_osd_save_placement(self, _save_spec, cephadm_module): + with with_host(cephadm_module, 'test'): + json_spec = {'service_type': 'osd', 'placement': {'host_pattern': 'test'}, + 'service_id': 'foo', 'data_devices': {'all': True}} + spec = ServiceSpec.from_json(json_spec) + assert isinstance(spec, DriveGroupSpec) + c = cephadm_module.apply([spec]) + assert wait(cephadm_module, c) == ['Scheduled osd.foo update...'] + _save_spec.assert_called_with(spec) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_create_osds(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(dg) + out = wait(cephadm_module, c) + assert out == "Created no osd(s) on host test; already created?" + bad_dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='invalid_hsot'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(bad_dg) + out = wait(cephadm_module, c) + assert "Invalid 'host:device' spec: host not found in cluster" in out + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_create_noncollocated_osd(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + c = cephadm_module.create_osds(dg) + out = wait(cephadm_module, c) + assert out == "Created no osd(s) on host test; already created?" + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch('cephadm.services.osd.OSDService._run_ceph_volume_command') + @mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume') + @mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None) + @mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {}) + def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(limit=5, rotational=1), + service_id='not_enough') + + disks_found = [ + '[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]'] + d_to_cv.return_value = 'foo' + _run_cv_cmd.return_value = (disks_found, '', 0) + preview = cephadm_module.osd_service.generate_previews([dg], 'test') + + for osd in preview: + assert 'notes' in osd + assert osd['notes'] == [ + 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_prepare_drivegroup(self, cephadm_module): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), + data_devices=DeviceSelection(paths=[''])) + out = cephadm_module.osd_service.prepare_drivegroup(dg) + assert len(out) == 1 + f1 = out[0] + assert f1[0] == 'test' + assert isinstance(f1[1], DriveSelection) + + @pytest.mark.parametrize( + "devices, preview, exp_commands", + [ + # no preview and only one disk, prepare is used due the hack that is in place. + (['/dev/sda'], False, ["lvm batch --no-auto /dev/sda --yes --no-systemd"]), + # no preview and multiple disks, uses batch + (['/dev/sda', '/dev/sdb'], False, + ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"]), + # preview and only one disk needs to use batch again to generate the preview + (['/dev/sda'], True, ["lvm batch --no-auto /dev/sda --yes --no-systemd --report --format json"]), + # preview and multiple disks work the same + (['/dev/sda', '/dev/sdb'], True, + ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"]), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec( + host_pattern='test'), data_devices=DeviceSelection(paths=devices)) + ds = DriveSelection(dg, Devices([Device(path) for path in devices])) + preview = preview + out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview) + assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) for cmd in out), f'Expected cmds from f{out} in {exp_commands}' + + @pytest.mark.parametrize( + "devices, preview, exp_commands", + [ + # one data device, no preview + (['/dev/sda'], False, ["raw prepare --bluestore --data /dev/sda"]), + # multiple data devices, no preview + (['/dev/sda', '/dev/sdb'], False, + ["raw prepare --bluestore --data /dev/sda", "raw prepare --bluestore --data /dev/sdb"]), + # one data device, preview + (['/dev/sda'], True, ["raw prepare --bluestore --data /dev/sda --report --format json"]), + # multiple data devices, preview + (['/dev/sda', '/dev/sdb'], True, + ["raw prepare --bluestore --data /dev/sda --report --format json", "raw prepare --bluestore --data /dev/sdb --report --format json"]), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_raw_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands): + with with_host(cephadm_module, 'test'): + dg = DriveGroupSpec(service_id='test.spec', method='raw', placement=PlacementSpec( + host_pattern='test'), data_devices=DeviceSelection(paths=devices)) + ds = DriveSelection(dg, Devices([Device(path) for path in devices])) + preview = preview + out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview) + assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) for cmd in out), f'Expected cmds from f{out} in {exp_commands}' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='osd.0', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ) + ]) + )) + @mock.patch("cephadm.services.osd.OSD.exists", True) + @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0) + def test_remove_osds(self, cephadm_module): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + c = cephadm_module.list_daemons() + wait(cephadm_module, c) + + c = cephadm_module.remove_daemons(['osd.0']) + out = wait(cephadm_module, c) + assert out == ["Removed osd.0 from host 'test'"] + + cephadm_module.to_remove_osds.enqueue(OSD(osd_id=0, + replace=False, + force=False, + hostname='test', + process_started_at=datetime_now(), + remove_util=cephadm_module.to_remove_osds.rm_util + )) + cephadm_module.to_remove_osds.process_removal_queue() + assert cephadm_module.to_remove_osds == OSDRemovalQueue(cephadm_module) + + c = cephadm_module.remove_osds_status() + out = wait(cephadm_module, c) + assert out == [] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_update(self, cephadm_module): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_service(cephadm_module, RGWSpec(service_id="foo", unmanaged=True)): + ps = PlacementSpec(hosts=['host1'], count=1) + c = cephadm_module.add_daemon( + RGWSpec(service_id="foo", placement=ps)) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed rgw.foo.* on host 'host1'") + + ps = PlacementSpec(hosts=['host1', 'host2'], count=2) + r = CephadmServe(cephadm_module)._apply_service( + RGWSpec(service_id="foo", placement=ps)) + assert r + + assert_rm_daemon(cephadm_module, 'rgw.foo', 'host1') + assert_rm_daemon(cephadm_module, 'rgw.foo', 'host2') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name='rgw.myrgw.myhost.myid', + style='cephadm', + fsid='fsid', + container_id='container_id', + version='version', + state='running', + ) + ]) + )) + def test_remove_daemon(self, cephadm_module): + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._refresh_host_daemons('test') + c = cephadm_module.list_daemons() + wait(cephadm_module, c) + c = cephadm_module.remove_daemons(['rgw.myrgw.myhost.myid']) + out = wait(cephadm_module, c) + assert out == ["Removed rgw.myrgw.myhost.myid from host 'test'"] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_remove_duplicate_osds(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1: # type: DaemonDescription + with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2: # type: DaemonDescription + CephadmServe(cephadm_module)._check_for_moved_osds() + # both are in status "starting" + assert len(cephadm_module.cache.get_daemons()) == 2 + + dd1.status = DaemonDescriptionStatus.running + dd2.status = DaemonDescriptionStatus.error + cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1}) + cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2}) + CephadmServe(cephadm_module)._check_for_moved_osds() + assert len(cephadm_module.cache.get_daemons()) == 1 + + assert cephadm_module.events.get_for_daemon('osd.1') == [ + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Deployed osd.1 on host 'host1'"), + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Deployed osd.1 on host 'host2'"), + OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO', + "Removed duplicated daemon on host 'host2'"), + ] + + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + @pytest.mark.parametrize( + "spec", + [ + ServiceSpec('crash'), + ServiceSpec('prometheus'), + ServiceSpec('grafana'), + ServiceSpec('node-exporter'), + ServiceSpec('alertmanager'), + ServiceSpec('rbd-mirror'), + ServiceSpec('cephfs-mirror'), + ServiceSpec('mds', service_id='fsname'), + RGWSpec(rgw_realm='realm', rgw_zone='zone'), + RGWSpec(service_id="foo"), + ServiceSpec('cephadm-exporter'), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test')) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_daemon_add(self, spec: ServiceSpec, cephadm_module): + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, unmanaged_spec): + with with_daemon(cephadm_module, spec, 'test'): + pass + + @pytest.mark.parametrize( + "entity,success,spec", + [ + ('mgr.x', True, ServiceSpec( + service_type='mgr', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('client.rgw.x', True, ServiceSpec( + service_type='rgw', + service_id='id', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('client.nfs.x', True, ServiceSpec( + service_type='nfs', + service_id='id', + placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ('mon.', False, ServiceSpec( + service_type='mon', + placement=PlacementSpec( + hosts=[HostPlacementSpec('test', '127.0.0.0/24', 'x')], count=1), + unmanaged=True) + ), # noqa: E124 + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + def test_daemon_add_fail(self, _run_cephadm, entity, success, spec, cephadm_module): + _run_cephadm.return_value = '{}', '', 0 + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.side_effect = OrchestratorError('fail') + with pytest.raises(OrchestratorError): + wait(cephadm_module, cephadm_module.add_daemon(spec)) + if success: + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + else: + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': entity, + }) + assert cephadm_module.events.get_for_service(spec.service_name()) == [ + OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'INFO', + "service was created"), + OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'ERROR', + "fail"), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_daemon_place_fail_health_warning(self, _run_cephadm, cephadm_module): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + _run_cephadm.side_effect = OrchestratorError('fail') + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert not r + assert cephadm_module.health_checks.get('CEPHADM_DAEMON_PLACE_FAIL') is not None + assert cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['count'] == 1 + assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['summary'] + assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['detail'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + CephadmServe(cephadm_module)._apply_all_services() + ps = PlacementSpec(hosts=['fail'], count=1) + r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps)) + assert not r + assert cephadm_module.apply_spec_fails + assert cephadm_module.health_checks.get('CEPHADM_APPLY_SPEC_FAIL') is not None + assert cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['count'] == 1 + assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['summary'] + + @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option") + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_invalid_config_option_health_warning(self, _run_cephadm, get_foreign_ceph_option, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) + get_foreign_ceph_option.side_effect = KeyError + CephadmServe(cephadm_module)._apply_service_config( + ServiceSpec('mgr', placement=ps, config={'test': 'foo'})) + assert cephadm_module.health_checks.get('CEPHADM_INVALID_CONFIG_OPTION') is not None + assert cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['count'] == 1 + assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['summary'] + assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['detail'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + def test_nfs(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test'], count=1) + spec = NFSServiceSpec( + service_id='name', + placement=ps) + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_service(cephadm_module, unmanaged_spec): + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed nfs.name.* on host 'test'") + + assert_rm_daemon(cephadm_module, 'nfs.name.test', 'test') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("subprocess.run", None) + @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock()) + @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') + def test_iscsi(self, cephadm_module): + with with_host(cephadm_module, 'test'): + ps = PlacementSpec(hosts=['test'], count=1) + spec = IscsiServiceSpec( + service_id='name', + pool='pool', + api_user='user', + api_password='password', + placement=ps) + unmanaged_spec = ServiceSpec.from_json(spec.to_json()) + unmanaged_spec.unmanaged = True + with with_service(cephadm_module, unmanaged_spec): + + c = cephadm_module.add_daemon(spec) + [out] = wait(cephadm_module, c) + match_glob(out, "Deployed iscsi.name.* on host 'test'") + + assert_rm_daemon(cephadm_module, 'iscsi.name.test', 'test') + + @pytest.mark.parametrize( + "on_bool", + [ + True, + False + ] + ) + @pytest.mark.parametrize( + "fault_ident", + [ + 'fault', + 'ident' + ] + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light(self, _run_cephadm, on_bool, fault_ident, cephadm_module): + _run_cephadm.return_value = '{}', '', 0 + with with_host(cephadm_module, 'test'): + c = cephadm_module.blink_device_light(fault_ident, on_bool, [('test', '', 'dev')]) + on_off = 'on' if on_bool else 'off' + assert wait(cephadm_module, c) == [f'Set {fault_ident} light for test: {on_off}'] + _run_cephadm.assert_called_with('test', 'osd', 'shell', [ + '--', 'lsmcli', f'local-disk-{fault_ident}-led-{on_off}', '--path', 'dev'], error_ok=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light_custom(self, _run_cephadm, cephadm_module): + _run_cephadm.return_value = '{}', '', 0 + with with_host(cephadm_module, 'test'): + cephadm_module.set_store('blink_device_light_cmd', 'echo hello') + c = cephadm_module.blink_device_light('ident', True, [('test', '', '/dev/sda')]) + assert wait(cephadm_module, c) == ['Set ident light for test: on'] + _run_cephadm.assert_called_with('test', 'osd', 'shell', [ + '--', 'echo', 'hello'], error_ok=True) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_blink_device_light_custom_per_host(self, _run_cephadm, cephadm_module): + _run_cephadm.return_value = '{}', '', 0 + with with_host(cephadm_module, 'mgr0'): + cephadm_module.set_store('mgr0/blink_device_light_cmd', + 'xyz --foo --{{ ident_fault }}={{\'on\' if on else \'off\'}} \'{{ path or dev }}\'') + c = cephadm_module.blink_device_light( + 'fault', True, [('mgr0', 'SanDisk_X400_M.2_2280_512GB_162924424784', '')]) + assert wait(cephadm_module, c) == [ + 'Set fault light for mgr0:SanDisk_X400_M.2_2280_512GB_162924424784 on'] + _run_cephadm.assert_called_with('mgr0', 'osd', 'shell', [ + '--', 'xyz', '--foo', '--fault=on', 'SanDisk_X400_M.2_2280_512GB_162924424784' + ], error_ok=True) + + @pytest.mark.parametrize( + "spec, meth", + [ + (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr), + (ServiceSpec('crash'), CephadmOrchestrator.apply_crash), + (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus), + (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana), + (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter), + (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager), + (ServiceSpec('rbd-mirror'), CephadmOrchestrator.apply_rbd_mirror), + (ServiceSpec('cephfs-mirror'), CephadmOrchestrator.apply_rbd_mirror), + (ServiceSpec('mds', service_id='fsname'), CephadmOrchestrator.apply_mds), + (ServiceSpec( + 'mds', service_id='fsname', + placement=PlacementSpec( + hosts=[HostPlacementSpec( + hostname='test', + name='fsname', + network='' + )] + ) + ), CephadmOrchestrator.apply_mds), + (RGWSpec(service_id='foo'), CephadmOrchestrator.apply_rgw), + (RGWSpec( + service_id='bar', + rgw_realm='realm', rgw_zone='zone', + placement=PlacementSpec( + hosts=[HostPlacementSpec( + hostname='test', + name='bar', + network='' + )] + ) + ), CephadmOrchestrator.apply_rgw), + (NFSServiceSpec( + service_id='name', + ), CephadmOrchestrator.apply_nfs), + (IscsiServiceSpec( + service_id='name', + pool='pool', + api_user='user', + api_password='password' + ), CephadmOrchestrator.apply_iscsi), + (CustomContainerSpec( + service_id='hello-world', + image='docker.io/library/hello-world:latest', + uid=65534, + gid=65534, + dirs=['foo/bar'], + files={ + 'foo/bar/xyz.conf': 'aaa\nbbb' + }, + bind_mounts=[[ + 'type=bind', + 'source=lib/modules', + 'destination=/lib/modules', + 'ro=true' + ]], + volume_mounts={ + 'foo/bar': '/foo/bar:Z' + }, + args=['--no-healthcheck'], + envs=['SECRET=password'], + ports=[8080, 8443] + ), CephadmOrchestrator.apply_container), + (ServiceSpec('cephadm-exporter'), CephadmOrchestrator.apply_cephadm_exporter), + ] + ) + @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test')) + @mock.patch("subprocess.run", None) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock()) + @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) + @mock.patch("subprocess.run", mock.MagicMock()) + def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec, meth, 'test'): + pass + + @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test')) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator): + spec = MDSSpec('mds', service_id='fsname', config={'test': 'foo'}) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec, host='test'): + ret, out, err = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': spec.service_name(), + 'key': 'mds_join_fs', + }) + assert out == 'fsname' + ret, out, err = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': spec.service_name(), + 'key': 'mds_join_fs', + }) + assert not out + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + @mock.patch("cephadm.services.cephadmservice.CephadmService.ok_to_stop") + def test_daemon_ok_to_stop(self, ok_to_stop, cephadm_module: CephadmOrchestrator): + spec = MDSSpec( + 'mds', + service_id='fsname', + placement=PlacementSpec(hosts=['host1', 'host2']), + config={'test': 'foo'} + ) + with with_host(cephadm_module, 'host1'), with_host(cephadm_module, 'host2'): + c = cephadm_module.apply_mds(spec) + out = wait(cephadm_module, c) + match_glob(out, "Scheduled mds.fsname update...") + CephadmServe(cephadm_module)._apply_all_services() + + [daemon] = cephadm_module.cache.daemons['host1'].keys() + + spec.placement.set_hosts(['host2']) + + ok_to_stop.side_effect = False + + c = cephadm_module.apply_mds(spec) + out = wait(cephadm_module, c) + match_glob(out, "Scheduled mds.fsname update...") + CephadmServe(cephadm_module)._apply_all_services() + + ok_to_stop.assert_called_with([daemon[4:]], force=True) + + assert_rm_daemon(cephadm_module, spec.service_name(), 'host1') # verifies ok-to-stop + assert_rm_daemon(cephadm_module, spec.service_name(), 'host2') + + @mock.patch("cephadm.module.CephadmOrchestrator._get_connection") + @mock.patch("remoto.process.check") + def test_offline(self, _check, _get_connection, cephadm_module): + _check.return_value = '{}', '', 0 + _get_connection.return_value = mock.Mock(), mock.Mock() + with with_host(cephadm_module, 'test'): + _get_connection.side_effect = HostNotFound + code, out, err = cephadm_module.check_host('test') + assert out == '' + assert "Host 'test' not found" in err + + out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json() + assert out == HostSpec('test', '1::4', status='Offline').to_json() + + _get_connection.side_effect = None + assert CephadmServe(cephadm_module)._check_host('test') is None + out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json() + assert out == HostSpec('test', '1::4').to_json() + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module): + # test daemons on offline/maint hosts not removed when applying specs + # test daemons not added to hosts in maint/offline state + with with_host(cephadm_module, 'test1'): + with with_host(cephadm_module, 'test2'): + with with_host(cephadm_module, 'test3'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*'))): + # should get a mgr on all 3 hosts + # CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3 + + # put one host in offline state and one host in maintenance state + cephadm_module.offline_hosts = {'test2'} + cephadm_module.inventory._inventory['test3']['status'] = 'maintenance' + cephadm_module.inventory.save() + + # being in offline/maint mode should disqualify hosts from being + # candidates for scheduling + candidates = [ + h.hostname for h in cephadm_module._schedulable_hosts()] + assert 'test2' in candidates + assert 'test3' in candidates + + unreachable = [h.hostname for h in cephadm_module._unreachable_hosts()] + assert 'test2' in unreachable + assert 'test3' in unreachable + + with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))): + # re-apply services. No mgr should be removed from maint/offline hosts + # crash daemon should only be on host not in maint/offline mode + CephadmServe(cephadm_module)._apply_all_services() + assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3 + assert len(cephadm_module.cache.get_daemons_by_type('crash')) == 1 + + cephadm_module.offline_hosts = {} + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_success(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.return_value = [''], ['something\nsuccess - systemd target xxx disabled'], 0 + _host_ok.return_value = 0, 'it is okay' + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + # should not raise an error + retval = cephadm_module.enter_host_maintenance(hostname) + assert retval.result_str().startswith('Daemons for Ceph cluster') + assert not retval.exception_str + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_enter_failure(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.return_value = [''], ['something\nfailed - disable the target'], 0 + _host_ok.return_value = 0, 'it is okay' + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname)) + + with pytest.raises(OrchestratorError, match='Failed to place host1 into maintenance for cluster fsid'): + cephadm_module.enter_host_maintenance(hostname) + + assert not cephadm_module.inventory._inventory[hostname]['status'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_exit_success(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.return_value = [''], [ + 'something\nsuccess - systemd target xxx enabled and started'], 0 + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance')) + # should not raise an error + retval = cephadm_module.exit_host_maintenance(hostname) + assert retval.result_str().startswith('Ceph cluster') + assert not retval.exception_str + assert not cephadm_module.inventory._inventory[hostname]['status'] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + @mock.patch("cephadm.module.HostCache.get_daemon_types") + @mock.patch("cephadm.module.HostCache.get_hosts") + def test_maintenance_exit_failure(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator): + hostname = 'host1' + _run_cephadm.return_value = [''], ['something\nfailed - unable to enable the target'], 0 + _get_daemon_types.return_value = ['crash'] + _hosts.return_value = [hostname, 'other_host'] + cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance')) + + with pytest.raises(OrchestratorError, match='Failed to exit maintenance state for host host1, cluster fsid'): + cephadm_module.exit_host_maintenance(hostname) + + assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance' + + def test_stale_connections(self, cephadm_module): + class Connection(object): + """ + A mocked connection class that only allows the use of the connection + once. If you attempt to use it again via a _check, it'll explode (go + boom!). + + The old code triggers the boom. The new code checks the has_connection + and will recreate the connection. + """ + fuse = False + + @ staticmethod + def has_connection(): + return False + + def import_module(self, *args, **kargs): + return mock.Mock() + + @ staticmethod + def exit(): + pass + + def _check(conn, *args, **kargs): + if conn.fuse: + raise Exception("boom: connection is dead") + else: + conn.fuse = True + return '{}', [], 0 + with mock.patch("remoto.Connection", side_effect=[Connection(), Connection(), Connection()]): + with mock.patch("remoto.process.check", _check): + with with_host(cephadm_module, 'test', refresh_hosts=False): + code, out, err = cephadm_module.check_host('test') + # First should succeed. + assert err == '' + + # On second it should attempt to reuse the connection, where the + # connection is "down" so will recreate the connection. The old + # code will blow up here triggering the BOOM! + code, out, err = cephadm_module.check_host('test') + assert err == '' + + @mock.patch("cephadm.module.CephadmOrchestrator._get_connection") + @mock.patch("remoto.process.check") + @mock.patch("cephadm.module.CephadmServe._write_remote_file") + def test_etc_ceph(self, _write_file, _check, _get_connection, cephadm_module): + _get_connection.return_value = mock.Mock(), mock.Mock() + _check.return_value = '{}', '', 0 + _write_file.return_value = None + + assert cephadm_module.manage_etc_ceph_ceph_conf is False + + with with_host(cephadm_module, 'test'): + assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test') + + with with_host(cephadm_module, 'test'): + cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True) + cephadm_module.config_notify() + assert cephadm_module.manage_etc_ceph_ceph_conf is True + + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + _write_file.assert_called_with('test', '/etc/ceph/ceph.conf', b'', + 0o644, 0, 0) + + assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test') + + # set extra config and expect that we deploy another ceph.conf + cephadm_module._set_extra_ceph_conf('[mon]\nk=v') + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + _write_file.assert_called_with('test', '/etc/ceph/ceph.conf', + b'\n\n[mon]\nk=v\n', 0o644, 0, 0) + + # reload + cephadm_module.cache.last_client_files = {} + cephadm_module.cache.load() + + assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test') + + # Make sure, _check_daemons does a redeploy due to monmap change: + before_digest = cephadm_module.cache.get_host_client_files('test')[ + '/etc/ceph/ceph.conf'][0] + cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2') + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + after_digest = cephadm_module.cache.get_host_client_files('test')[ + '/etc/ceph/ceph.conf'][0] + assert before_digest != after_digest + + def test_etc_ceph_init(self): + with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m: + assert m.manage_etc_ceph_ceph_conf is True + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + def check_registry_credentials(url, username, password): + assert json.loads(cephadm_module.get_store('registry_credentials')) == { + 'url': url, 'username': username, 'password': password} + + _run_cephadm.return_value = '{}', '', 0 + with with_host(cephadm_module, 'test'): + # test successful login with valid args + code, out, err = cephadm_module.registry_login('test-url', 'test-user', 'test-password') + assert out == 'registry login scheduled' + assert err == '' + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test bad login attempt with invalid args + code, out, err = cephadm_module.registry_login('bad-args') + assert err == ("Invalid arguments. Please provide arguments <url> <username> <password> " + "or -i <login credentials json file>") + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test bad login using invalid json file + code, out, err = cephadm_module.registry_login( + None, None, None, '{"bad-json": "bad-json"}') + assert err == ("json provided for custom registry login did not include all necessary fields. " + "Please setup json file as\n" + "{\n" + " \"url\": \"REGISTRY_URL\",\n" + " \"username\": \"REGISTRY_USERNAME\",\n" + " \"password\": \"REGISTRY_PASSWORD\"\n" + "}\n") + check_registry_credentials('test-url', 'test-user', 'test-password') + + # test good login using valid json file + good_json = ("{\"url\": \"" + "json-url" + "\", \"username\": \"" + "json-user" + "\", " + " \"password\": \"" + "json-pass" + "\"}") + code, out, err = cephadm_module.registry_login(None, None, None, good_json) + assert out == 'registry login scheduled' + assert err == '' + check_registry_credentials('json-url', 'json-user', 'json-pass') + + # test bad login where args are valid but login command fails + _run_cephadm.return_value = '{}', 'error', 1 + code, out, err = cephadm_module.registry_login('fail-url', 'fail-user', 'fail-password') + assert err == 'Host test failed to login to fail-url as fail-user with given password' + check_registry_credentials('json-url', 'json-user', 'json-pass') + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['image@repo_digest'], + }))) + @pytest.mark.parametrize("use_repo_digest", + [ + False, + True + ]) + def test_upgrade_run(self, use_repo_digest, cephadm_module: CephadmOrchestrator): + cephadm_module.use_repo_digest = use_repo_digest + + with with_host(cephadm_module, 'test', refresh_hosts=False): + cephadm_module.set_container_image('global', 'image') + + if use_repo_digest: + + CephadmServe(cephadm_module).convert_tags_to_repo_digest() + + _, image, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': 'global', + 'key': 'container_image', + }) + if use_repo_digest: + assert image == 'image@repo_digest' + else: + assert image == 'image' + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ceph_volume_no_filter_for_batch(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + error_message = """cephadm exited with an error code: 1, stderr:/usr/bin/podman:stderr usage: ceph-volume inventory [-h] [--format {plain,json,json-pretty}] [path]/usr/bin/podman:stderr ceph-volume inventory: error: unrecognized arguments: --filter-for-batch +Traceback (most recent call last): + File "<stdin>", line 6112, in <module> + File "<stdin>", line 1299, in _infer_fsid + File "<stdin>", line 1382, in _infer_image + File "<stdin>", line 3612, in command_ceph_volume + File "<stdin>", line 1061, in call_throws""" + + with with_host(cephadm_module, 'test'): + _run_cephadm.reset_mock() + _run_cephadm.side_effect = OrchestratorError(error_message) + + s = CephadmServe(cephadm_module)._refresh_host_devices('test') + assert s == 'host test `cephadm ceph-volume` failed: ' + error_message + + assert _run_cephadm.mock_calls == [ + mock.call('test', 'osd', 'ceph-volume', + ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='', + no_fsid=False), + mock.call('test', 'osd', 'ceph-volume', + ['--', 'inventory', '--format=json-pretty'], image='', + no_fsid=False), + ] + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test', refresh_hosts=False): + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice_fail(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test', refresh_hosts=False): + cephadm_module.mock_store_set('_ceph_get', 'osd_map', { + 'osds': [ + { + 'osd': 1, + 'up_from': 0, + 'uuid': 'uuid' + } + ] + }) + + ceph_volume_lvm_list = { + '1': [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }] + } + _run_cephadm.reset_mock(return_value=True) + + def _r_c(*args, **kwargs): + if 'ceph-volume' in args: + return (json.dumps(ceph_volume_lvm_list), '', 0) + else: + assert 'deploy' in args + raise OrchestratorError("let's fail somehow") + _run_cephadm.side_effect = _r_c + assert cephadm_module._osd_activate( + ['test']).stderr == "let's fail somehow" + with pytest.raises(AssertionError): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'auth rm', + 'entity': 'osd.1', + }) + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_activate_datadevice_dbdevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test', refresh_hosts=False): + + def _ceph_volume_list(s, host, entity, cmd, **kwargs): + logging.info(f'ceph-volume cmd: {cmd}') + if 'raw' in cmd: + return json.dumps({ + "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": { + "ceph_fsid": "64c84f19-fe1d-452a-a731-ab19dc144aa8", + "device": "/dev/loop0", + "osd_id": 21, + "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5", + "type": "bluestore" + }, + }), '', 0 + if 'lvm' in cmd: + return json.dumps({ + '1': [{ + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'data' + }, { + 'tags': { + 'ceph.cluster_fsid': cephadm_module._cluster_fsid, + 'ceph.osd_fsid': 'uuid' + }, + 'type': 'db' + }] + }), '', 0 + return '{}', '', 0 + + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1, ceph_volume_lvm_list=_ceph_volume_list): + pass + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_osd_count(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + dg = DriveGroupSpec(service_id='', data_devices=DeviceSelection(all=True)) + with with_host(cephadm_module, 'test', refresh_hosts=False): + with with_service(cephadm_module, dg, host='test'): + with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1): + assert wait(cephadm_module, cephadm_module.describe_service())[0].size == 1 + + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) + def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator): + with pytest.raises(OrchestratorError): + with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False): + cephadm_module.inventory.add_label('test', '_admin') + pass + assert False + with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True): + with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False): + cephadm_module.inventory.add_label('test2', '_admin') diff --git a/src/pybind/mgr/cephadm/tests/test_completion.py b/src/pybind/mgr/cephadm/tests/test_completion.py new file mode 100644 index 000000000..327c12d2a --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_completion.py @@ -0,0 +1,40 @@ +import pytest + +from ..module import forall_hosts + + +class TestCompletion(object): + + @pytest.mark.parametrize("input,expected", [ + ([], []), + ([1], ["(1,)"]), + (["hallo"], ["('hallo',)"]), + ("hi", ["('h',)", "('i',)"]), + (list(range(5)), [str((x, )) for x in range(5)]), + ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]), + ]) + def test_async_map(self, input, expected, cephadm_module): + @forall_hosts + def run_forall(*args): + return str(args) + assert run_forall(input) == expected + + @pytest.mark.parametrize("input,expected", [ + ([], []), + ([1], ["(1,)"]), + (["hallo"], ["('hallo',)"]), + ("hi", ["('h',)", "('i',)"]), + (list(range(5)), [str((x, )) for x in range(5)]), + ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]), + ]) + def test_async_map_self(self, input, expected, cephadm_module): + class Run(object): + def __init__(self): + self.attr = 1 + + @forall_hosts + def run_forall(self, *args): + assert self.attr == 1 + return str(args) + + assert Run().run_forall(input) == expected diff --git a/src/pybind/mgr/cephadm/tests/test_configchecks.py b/src/pybind/mgr/cephadm/tests/test_configchecks.py new file mode 100644 index 000000000..3cae0a27d --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_configchecks.py @@ -0,0 +1,668 @@ +import copy +import json +import logging +import ipaddress +import pytest +import uuid + +from time import time as now + +from ..configchecks import CephadmConfigChecks +from ..inventory import HostCache +from ..upgrade import CephadmUpgrade, UpgradeState +from orchestrator import DaemonDescription + +from typing import List, Dict, Any, Optional + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +host_sample = { + "arch": "x86_64", + "bios_date": "04/01/2014", + "bios_version": "F2", + "cpu_cores": 16, + "cpu_count": 2, + "cpu_load": { + "15min": 0.0, + "1min": 0.01, + "5min": 0.01 + }, + "cpu_model": "Intel® Xeon® Processor E5-2698 v3", + "cpu_threads": 64, + "flash_capacity": "4.0TB", + "flash_capacity_bytes": 4000797868032, + "flash_count": 2, + "flash_list": [ + { + "description": "ATA CT2000MX500SSD1 (2.0TB)", + "dev_name": "sda", + "disk_size_bytes": 2000398934016, + "model": "CT2000MX500SSD1", + "rev": "023", + "vendor": "ATA", + "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0" + }, + { + "description": "ATA CT2000MX500SSD1 (2.0TB)", + "dev_name": "sdb", + "disk_size_bytes": 2000398934016, + "model": "CT2000MX500SSD1", + "rev": "023", + "vendor": "ATA", + "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0" + }, + ], + "hdd_capacity": "16.0TB", + "hdd_capacity_bytes": 16003148120064, + "hdd_count": 4, + "hdd_list": [ + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdc", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdd", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sde", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + { + "description": "ST4000VN008-2DR1 (4.0TB)", + "dev_name": "sdf", + "disk_size_bytes": 4000787030016, + "model": "ST4000VN008-2DR1", + "rev": "SC60", + "vendor": "ATA", + "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ" + }, + ], + "hostname": "dummy", + "interfaces": { + "eth0": { + "driver": "e1000e", + "iftype": "physical", + "ipv4_address": "10.7.17.1/24", + "ipv6_address": "fe80::215:17ff:feab:50e2/64", + "lower_devs_list": [], + "mtu": 9000, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [], + }, + "eth1": { + "driver": "e1000e", + "iftype": "physical", + "ipv4_address": "10.7.18.1/24", + "ipv6_address": "fe80::215:17ff:feab:50e2/64", + "lower_devs_list": [], + "mtu": 9000, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [], + }, + "eth2": { + "driver": "r8169", + "iftype": "physical", + "ipv4_address": "10.7.19.1/24", + "ipv6_address": "fe80::76d4:35ff:fe58:9a79/64", + "lower_devs_list": [], + "mtu": 1500, + "nic_type": "ethernet", + "operstate": "up", + "speed": 1000, + "upper_devs_list": [] + }, + }, + "kernel": "4.18.0-240.10.1.el8_3.x86_64", + "kernel_parameters": { + "net.ipv4.ip_nonlocal_bind": "0", + }, + "kernel_security": { + "SELINUX": "enforcing", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(enforcing, targeted)", + "type": "SELinux" + }, + "memory_available_kb": 19489212, + "memory_free_kb": 245164, + "memory_total_kb": 32900916, + "model": "StorageHeavy", + "nic_count": 3, + "operating_system": "Red Hat Enterprise Linux 8.3 (Ootpa)", + "subscribed": "Yes", + "system_uptime": 777600.0, + "timestamp": now(), + "vendor": "Ceph Servers Inc", +} + + +def role_list(n: int) -> List[str]: + if n == 1: + return ['mon', 'mgr', 'osd'] + if n in [2, 3]: + return ['mon', 'mds', 'osd'] + + return ['osd'] + + +def generate_testdata(count: int = 10, public_network: str = '10.7.17.0/24', cluster_network: str = '10.7.18.0/24'): + # public network = eth0, cluster_network = eth1 + assert count > 3 + assert public_network + num_disks = host_sample['hdd_count'] + hosts = {} + daemons = {} + daemon_to_host = {} + osd_num = 0 + public_netmask = public_network.split('/')[1] + cluster_ip_list = [] + cluster_netmask = '' + + public_ip_list = [str(i) for i in list(ipaddress.ip_network(public_network).hosts())] + if cluster_network: + cluster_ip_list = [str(i) for i in list(ipaddress.ip_network(cluster_network).hosts())] + cluster_netmask = cluster_network.split('/')[1] + + for n in range(1, count + 1, 1): + + new_host = copy.deepcopy(host_sample) + hostname = f"node-{n}.ceph.com" + + new_host['hostname'] = hostname + new_host['interfaces']['eth0']['ipv4_address'] = f"{public_ip_list.pop(0)}/{public_netmask}" + if cluster_ip_list: + new_host['interfaces']['eth1']['ipv4_address'] = f"{cluster_ip_list.pop(0)}/{cluster_netmask}" + else: + new_host['interfaces']['eth1']['ipv4_address'] = '' + + hosts[hostname] = new_host + daemons[hostname] = {} + for r in role_list(n): + name = '' + if r == 'osd': + for n in range(num_disks): + osd = DaemonDescription( + hostname=hostname, daemon_type='osd', daemon_id=osd_num) + name = f"osd.{osd_num}" + daemons[hostname][name] = osd + daemon_to_host[name] = hostname + osd_num += 1 + else: + name = f"{r}.{hostname}" + daemons[hostname][name] = DaemonDescription( + hostname=hostname, daemon_type=r, daemon_id=hostname) + daemon_to_host[name] = hostname + + logger.debug(f"daemon to host lookup - {json.dumps(daemon_to_host)}") + return hosts, daemons, daemon_to_host + + +@pytest.fixture() +def mgr(): + """Provide a fake ceph mgr object preloaded with a configuration""" + mgr = FakeMgr() + mgr.cache.facts, mgr.cache.daemons, mgr.daemon_to_host = \ + generate_testdata(public_network='10.9.64.0/24', cluster_network='') + mgr.module_option.update({ + "config_checks_enabled": True, + }) + yield mgr + + +class FakeMgr: + + def __init__(self): + self.datastore = {} + self.module_option = {} + self.health_checks = {} + self.default_version = 'quincy' + self.version_overrides = {} + self.daemon_to_host = {} + + self.cache = HostCache(self) + self.upgrade = CephadmUpgrade(self) + + def set_health_checks(self, checks: dict): + return + + def get_module_option(self, keyname: str) -> Optional[str]: + return self.module_option.get(keyname, None) + + def set_module_option(self, keyname: str, value: str) -> None: + return None + + def get_store(self, keyname: str, default=None) -> Optional[str]: + return self.datastore.get(keyname, None) + + def set_store(self, keyname: str, value: str) -> None: + self.datastore[keyname] = value + return None + + def _ceph_get_server(self) -> None: + pass + + def get_metadata(self, daemon_type: str, daemon_id: str) -> Dict[str, Any]: + key = f"{daemon_type}.{daemon_id}" + if key in self.version_overrides: + logger.debug(f"override applied for {key}") + version_str = self.version_overrides[key] + else: + version_str = self.default_version + + return {"ceph_release": version_str, "hostname": self.daemon_to_host[key]} + + def list_servers(self) -> List[Dict[str, List[Dict[str, str]]]]: + num_disks = host_sample['hdd_count'] + osd_num = 0 + service_map = [] + + for hostname in self.cache.facts: + + host_num = int(hostname.split('.')[0].split('-')[1]) + svc_list = [] + for r in role_list(host_num): + if r == 'osd': + for _n in range(num_disks): + svc_list.append({ + "type": "osd", + "id": osd_num, + }) + osd_num += 1 + else: + svc_list.append({ + "type": r, + "id": hostname, + }) + + service_map.append({"services": svc_list}) + logger.debug(f"services map - {json.dumps(service_map)}") + return service_map + + def use_repo_digest(self) -> None: + return None + + +class TestConfigCheck: + + def test_to_json(self, mgr): + checker = CephadmConfigChecks(mgr) + out = checker.to_json() + assert out + assert len(out) == len(checker.health_checks) + + def test_lookup_check(self, mgr): + checker = CephadmConfigChecks(mgr) + check = checker.lookup_check('osd_mtu_size') + logger.debug(json.dumps(check.to_json())) + assert check + assert check.healthcheck_name == "CEPHADM_CHECK_MTU" + + def test_old_checks_removed(self, mgr): + mgr.datastore.update({ + "config_checks": '{"bogus_one": "enabled", "bogus_two": "enabled", ' + '"kernel_security": "enabled", "public_network": "enabled", ' + '"kernel_version": "enabled", "network_missing": "enabled", ' + '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled", ' + '"os_subscription": "enabled", "ceph_release": "enabled"}' + }) + checker = CephadmConfigChecks(mgr) + raw = mgr.get_store('config_checks') + checks = json.loads(raw) + assert "bogus_one" not in checks + assert "bogus_two" not in checks + assert len(checks) == len(checker.health_checks) + + def test_new_checks(self, mgr): + mgr.datastore.update({ + "config_checks": '{"kernel_security": "enabled", "public_network": "enabled", ' + '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled"}' + }) + checker = CephadmConfigChecks(mgr) + raw = mgr.get_store('config_checks') + checks = json.loads(raw) + assert len(checks) == len(checker.health_checks) + + def test_no_issues(self, mgr): + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + checker.run_checks() + + assert not mgr.health_checks + + def test_no_public_network(self, mgr): + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['ipv4_address'] = "192.168.1.20/24" + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + checker.run_checks() + logger.debug(mgr.health_checks) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_PUBLIC_MEMBERSHIP' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP']['detail'][0] == \ + 'node-1.ceph.com does not have an interface on any public network' + + def test_missing_networks(self, mgr): + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.66.0/24'] + checker.run_checks() + + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_NETWORK_MISSING' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING']['detail'][0] == \ + "10.9.66.0/24 not found on any host in the cluster" + + def test_bad_mtu_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_MTU']['detail'][0] == \ + 'host node-1.ceph.com(eth0) is using MTU 1500 on 10.9.64.0/24, NICs on other hosts use 9000' + + def test_bad_mtu_multiple(self, mgr): + + for n in [1, 5]: + bad_node = mgr.cache.facts[f'node-{n}.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_MTU']['count'] == 2 + + def test_bad_linkspeed_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['speed'] = 100 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert "CEPHADM_CHECK_LINKSPEED" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_LINKSPEED']['detail'][0] == \ + 'host node-1.ceph.com(eth0) has linkspeed of 100 on 10.9.64.0/24, NICs on other hosts use 1000' + + def test_super_linkspeed_single(self, mgr): + + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['speed'] = 10000 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert not mgr.health_checks + + def test_release_mismatch_single(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert mgr.health_checks + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1 + assert mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail'][0] == \ + 'osd.1 is running pacific (majority of cluster is using quincy)' + + def test_release_mismatch_multi(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + "osd.5": "octopus", + } + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert mgr.health_checks + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1 + assert len(mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail']) == 2 + + def test_kernel_mismatch(self, mgr): + + bad_host = mgr.cache.facts['node-1.ceph.com'] + bad_host['kernel'] = "5.10.18.0-241.10.1.el8.x86_64" + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_KERNEL_VERSION' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['detail'][0] == \ + "host node-1.ceph.com running kernel 5.10, majority of hosts(9) running 4.18" + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['count'] == 1 + + def test_inconsistent_subscription(self, mgr): + + bad_host = mgr.cache.facts['node-5.ceph.com'] + bad_host['subscribed'] = "no" + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION']['detail'][0] == \ + "node-5.ceph.com does not have an active subscription" + + def test_kernel_security_inconsistent(self, mgr): + + bad_node = mgr.cache.facts['node-3.ceph.com'] + bad_node['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + assert len(mgr.health_checks) == 1 + assert 'CEPHADM_CHECK_KERNEL_LSM' in mgr.health_checks + assert mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM']['detail'][0] == \ + "node-3.ceph.com has inconsistent KSM settings compared to the majority of hosts(9) in the cluster" + + def test_release_and_bad_mtu(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node = mgr.cache.facts['node-1.ceph.com'] + bad_node['interfaces']['eth0']['mtu'] = 1500 + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 2 + assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks + + def test_release_mtu_LSM(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node1 = mgr.cache.facts['node-1.ceph.com'] + bad_node1['interfaces']['eth0']['mtu'] = 1500 + bad_node2 = mgr.cache.facts['node-3.ceph.com'] + bad_node2['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 3 + assert \ + "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks and \ + "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks + + def test_release_mtu_LSM_subscription(self, mgr): + + mgr.version_overrides = { + "osd.1": "pacific", + } + bad_node1 = mgr.cache.facts['node-1.ceph.com'] + bad_node1['interfaces']['eth0']['mtu'] = 1500 + bad_node1['subscribed'] = "no" + bad_node2 = mgr.cache.facts['node-3.ceph.com'] + bad_node2['kernel_security'] = { + "SELINUX": "permissive", + "SELINUXTYPE": "targeted", + "description": "SELinux: Enabled(permissive, targeted)", + "type": "SELinux" + } + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(json.dumps(mgr.health_checks)) + logger.info(checker.subnet_lookup) + assert mgr.health_checks + assert len(mgr.health_checks) == 4 + assert \ + "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \ + "CEPHADM_CHECK_MTU" in mgr.health_checks and \ + "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks and \ + "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks + + def test_skip_release_during_upgrade(self, mgr): + mgr.upgrade.upgrade_state = UpgradeState.from_json({ + 'target_name': 'wah', + 'progress_id': str(uuid.uuid4()), + 'target_id': 'wah', + 'error': '', + 'paused': False, + }) + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(f"{checker.skipped_checks_count} skipped check(s): {checker.skipped_checks}") + assert checker.skipped_checks_count == 1 + assert 'ceph_release' in checker.skipped_checks + + def test_skip_when_disabled(self, mgr): + mgr.module_option.update({ + "config_checks_enabled": "false" + }) + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert checker.active_checks_count == 0 + + def test_skip_mtu_checks(self, mgr): + mgr.datastore.update({ + 'config_checks': '{"osd_mtu_size": "disabled"}' + }) + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert 'osd_mtu_size' not in checker.active_checks + assert checker.defined_checks == 8 and checker.active_checks_count == 7 + + def test_skip_mtu_lsm_checks(self, mgr): + mgr.datastore.update({ + 'config_checks': '{"osd_mtu_size": "disabled", "kernel_security": "disabled"}' + }) + + checker = CephadmConfigChecks(mgr) + checker.cluster_network_list = [] + checker.public_network_list = ['10.9.64.0/24'] + + checker.run_checks() + logger.info(checker.active_checks) + logger.info(checker.defined_checks) + assert 'osd_mtu_size' not in checker.active_checks and \ + 'kernel_security' not in checker.active_checks + assert checker.defined_checks == 8 and checker.active_checks_count == 6 + assert not mgr.health_checks diff --git a/src/pybind/mgr/cephadm/tests/test_facts.py b/src/pybind/mgr/cephadm/tests/test_facts.py new file mode 100644 index 000000000..6c33f5368 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_facts.py @@ -0,0 +1,10 @@ +from ..import CephadmOrchestrator + +from .fixtures import wait + + +def test_facts(cephadm_module: CephadmOrchestrator): + facts = {'node-1.ceph.com': {'bios_version': 'F2', 'cpu_cores': 16}} + cephadm_module.cache.facts = facts + ret_facts = cephadm_module.get_facts('node-1.ceph.com') + assert wait(cephadm_module, ret_facts) == [{'bios_version': 'F2', 'cpu_cores': 16}] diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py new file mode 100644 index 000000000..b95f54f7c --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_migration.py @@ -0,0 +1,229 @@ +import json + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec +from ceph.utils import datetime_to_str, datetime_now +from cephadm import CephadmOrchestrator +from cephadm.inventory import SPEC_STORE_PREFIX +from cephadm.migrations import LAST_MIGRATION +from cephadm.tests.fixtures import _run_cephadm, wait, with_host +from cephadm.serve import CephadmServe +from tests import mock + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_scheduler(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1', refresh_hosts=False): + with with_host(cephadm_module, 'host2', refresh_hosts=False): + + # emulate the old scheduler: + c = cephadm_module.apply_rgw( + ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='*', count=2)) + ) + assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...' + + # with pytest.raises(OrchestratorError, match="cephadm migration still ongoing. Please wait, until the migration is complete."): + CephadmServe(cephadm_module)._apply_all_services() + + cephadm_module.migration_current = 0 + cephadm_module.migration.migrate() + # assert we need all daemons. + assert cephadm_module.migration_current == 0 + + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + cephadm_module.migration.migrate() + + CephadmServe(cephadm_module)._apply_all_services() + + out = {o.hostname for o in wait(cephadm_module, cephadm_module.list_daemons())} + assert out == {'host1', 'host2'} + + c = cephadm_module.apply_rgw( + ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='host1', count=2)) + ) + assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...' + + # Sorry, for this hack, but I need to make sure, Migration thinks, + # we have updated all daemons already. + cephadm_module.cache.last_daemon_update['host1'] = datetime_now() + cephadm_module.cache.last_daemon_update['host2'] = datetime_now() + + cephadm_module.migration_current = 0 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + out = [o.spec.placement for o in wait( + cephadm_module, cephadm_module.describe_service())] + assert out == [PlacementSpec(count=2, hosts=[HostPlacementSpec( + hostname='host1', network='', name=''), HostPlacementSpec(hostname='host2', network='', name='')])] + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mon_one(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'service_id': 'wrong', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon' + + cephadm_module.migration_current = 1 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec( + service_type='mon', + unmanaged=True, + placement=PlacementSpec(hosts=['host1']) + ) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mon_two(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'placement': { + 'count': 5, + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({ + 'spec': { + 'service_type': 'mon', + 'service_id': 'wrong', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + assert len(cephadm_module.spec_store.all_specs) == 2 + assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon' + assert cephadm_module.spec_store.all_specs['mon'].service_name() == 'mon' + + cephadm_module.migration_current = 1 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current >= 2 + + assert len(cephadm_module.spec_store.all_specs) == 1 + assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec( + service_type='mon', + unmanaged=True, + placement=PlacementSpec(count=5) + ) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_service_id_mds_one(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store(SPEC_STORE_PREFIX + 'mds', json.dumps({ + 'spec': { + 'service_type': 'mds', + 'placement': { + 'hosts': ['host1'] + } + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + + cephadm_module.spec_store.load() + + # there is nothing to migrate, as the spec is gone now. + assert len(cephadm_module.spec_store.all_specs) == 0 + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_nfs_initial(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'mds', + json.dumps({ + 'spec': { + 'service_type': 'nfs', + 'service_id': 'foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'pool': 'mypool', + 'namespace': 'foons', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + + ls = json.loads(cephadm_module.get_store('nfs_migration_queue')) + assert ls == [['foo', 'mypool', 'foons']] + + cephadm_module.migration.migrate(True) + assert cephadm_module.migration_current == 2 + + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_nfs_initial_octopus(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.set_store( + SPEC_STORE_PREFIX + 'mds', + json.dumps({ + 'spec': { + 'service_type': 'nfs', + 'service_id': 'ganesha-foo', + 'placement': { + 'hosts': ['host1'] + }, + 'spec': { + 'pool': 'mypool', + 'namespace': 'foons', + }, + }, + 'created': datetime_to_str(datetime_now()), + }, sort_keys=True), + ) + cephadm_module.migration_current = 1 + cephadm_module.spec_store.load() + + ls = json.loads(cephadm_module.get_store('nfs_migration_queue')) + assert ls == [['ganesha-foo', 'mypool', 'foons']] + + cephadm_module.migration.migrate(True) + assert cephadm_module.migration_current == 2 + + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')) +def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator): + assert 'client.admin' not in cephadm_module.keys.keys + + cephadm_module.migration_current = 3 + cephadm_module.migration.migrate() + assert cephadm_module.migration_current == LAST_MIGRATION + + assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin' diff --git a/src/pybind/mgr/cephadm/tests/test_osd_removal.py b/src/pybind/mgr/cephadm/tests/test_osd_removal.py new file mode 100644 index 000000000..6685fcb2a --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_osd_removal.py @@ -0,0 +1,298 @@ +import json + +from cephadm.services.osd import OSDRemovalQueue, OSD +import pytest +from tests import mock +from .fixtures import with_cephadm_module +from datetime import datetime + + +class MockOSD: + + def __init__(self, osd_id): + self.osd_id = osd_id + + +class TestOSDRemoval: + + @pytest.mark.parametrize( + "osd_id, osd_df, expected", + [ + # missing 'nodes' key + (1, dict(nodes=[]), -1), + # missing 'pgs' key + (1, dict(nodes=[dict(id=1)]), -1), + # id != osd_id + (1, dict(nodes=[dict(id=999, pgs=1)]), -1), + # valid + (1, dict(nodes=[dict(id=1, pgs=1)]), 1), + ] + ) + def test_get_pg_count(self, rm_util, osd_id, osd_df, expected): + with mock.patch("cephadm.services.osd.RemoveUtil.osd_df", return_value=osd_df): + assert rm_util.get_pg_count(osd_id) == expected + + @pytest.mark.parametrize( + "osds, ok_to_stop, expected", + [ + # no osd_ids provided + ([], [False], []), + # all osds are ok_to_stop + ([1, 2], [True], [1, 2]), + # osds are ok_to_stop after the second iteration + ([1, 2], [False, True], [2]), + # osds are never ok_to_stop, (taking the sample size `(len(osd_ids))` into account), + # expected to get False + ([1, 2], [False, False], []), + ] + ) + def test_find_stop_threshold(self, rm_util, osds, ok_to_stop, expected): + with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop): + assert rm_util.find_osd_stop_threshold(osds) == expected + + def test_process_removal_queue(self, rm_util): + # TODO: ! + # rm_util.process_removal_queue() + pass + + @pytest.mark.parametrize( + "max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected", + [ + # drain one at a time, one already draining + (1, [1], [1], [True], 0), + # drain one at a time, none draining yet + (1, [], [1, 2, 3], [True, True, True], 1), + # drain one at a time, one already draining, none ok-to-stop + (1, [1], [1], [False], 0), + # drain one at a time, none draining, one ok-to-stop + (1, [], [1, 2, 3], [False, False, True], 1), + # drain three at a time, one already draining, all ok-to-stop + (3, [1], [1, 2, 3], [True, True, True], 2), + # drain two at a time, none already draining, none ok-to-stop + (2, [], [1, 2, 3], [False, False, False], 0), + # drain two at a time, none already draining, none idling + (2, [], [], [], 0), + ] + ) + def test_ready_to_drain_osds(self, max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected): + with with_cephadm_module({'max_osd_draining_count': max_osd_draining_count}) as m: + with mock.patch("cephadm.services.osd.OSDRemovalQueue.draining_osds", return_value=draining_osds): + with mock.patch("cephadm.services.osd.OSDRemovalQueue.idling_osds", return_value=idling_osds): + with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop): + removal_queue = OSDRemovalQueue(m) + assert len(removal_queue._ready_to_drain_osds()) == expected + + def test_ok_to_stop(self, rm_util): + rm_util.ok_to_stop([MockOSD(1)]) + rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd ok-to-stop', 'ids': ['1']}, + error_ok=True) + + def test_safe_to_destroy(self, rm_util): + rm_util.safe_to_destroy([1]) + rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd safe-to-destroy', + 'ids': ['1']}, error_ok=True) + + def test_destroy_osd(self, rm_util): + rm_util.destroy_osd(1) + rm_util._run_mon_cmd.assert_called_with( + {'prefix': 'osd destroy-actual', 'id': 1, 'yes_i_really_mean_it': True}) + + def test_purge_osd(self, rm_util): + rm_util.purge_osd(1) + rm_util._run_mon_cmd.assert_called_with( + {'prefix': 'osd purge-actual', 'id': 1, 'yes_i_really_mean_it': True}) + + def test_load(self, cephadm_module, rm_util): + data = json.dumps([ + { + "osd_id": 35, + "started": True, + "draining": True, + "stopped": False, + "replace": False, + "force": False, + "zap": False, + "nodename": "node2", + "drain_started_at": "2020-09-14T11:41:53.960463", + "drain_stopped_at": None, + "drain_done_at": None, + "process_started_at": "2020-09-14T11:41:52.245832" + } + ]) + cephadm_module.set_store('osd_remove_queue', data) + cephadm_module.to_remove_osds.load_from_store() + + expected = OSDRemovalQueue(cephadm_module) + expected.osds.add(OSD(osd_id=35, remove_util=rm_util, draining=True)) + assert cephadm_module.to_remove_osds == expected + + +class TestOSD: + + def test_start(self, osd_obj): + assert osd_obj.started is False + osd_obj.start() + assert osd_obj.started is True + assert osd_obj.stopped is False + + def test_start_draining_purge(self, osd_obj): + assert osd_obj.draining is False + assert osd_obj.drain_started_at is None + ret = osd_obj.start_draining() + osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 0.0) + assert isinstance(osd_obj.drain_started_at, datetime) + assert osd_obj.draining is True + assert osd_obj.replace is False + assert ret is True + + def test_start_draining_replace(self, osd_obj): + assert osd_obj.draining is False + assert osd_obj.drain_started_at is None + osd_obj.replace = True + ret = osd_obj.start_draining() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'out') + assert isinstance(osd_obj.drain_started_at, datetime) + assert osd_obj.draining is True + assert osd_obj.replace is True + assert ret is True + + def test_start_draining_stopped(self, osd_obj): + osd_obj.stopped = True + ret = osd_obj.start_draining() + assert osd_obj.drain_started_at is None + assert ret is False + assert osd_obj.draining is False + + def test_stop_draining_replace(self, osd_obj): + osd_obj.replace = True + ret = osd_obj.stop_draining() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'in') + assert isinstance(osd_obj.drain_stopped_at, datetime) + assert osd_obj.draining is False + assert ret is True + + def test_stop_draining_purge(self, osd_obj): + osd_obj.original_weight = 1.0 + ret = osd_obj.stop_draining() + osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 1.0) + assert isinstance(osd_obj.drain_stopped_at, datetime) + assert osd_obj.draining is False + assert ret is True + + @mock.patch('cephadm.services.osd.OSD.stop_draining') + def test_stop(self, stop_draining_mock, osd_obj): + osd_obj.stop() + assert osd_obj.started is False + assert osd_obj.stopped is True + stop_draining_mock.assert_called_once() + + @pytest.mark.parametrize( + "draining, empty, expected", + [ + # must be !draining! and !not empty! to yield True + (True, not True, True), + # not draining and not empty + (False, not True, False), + # not draining and empty + (False, True, False), + # draining and empty + (True, True, False), + ] + ) + def test_is_draining(self, osd_obj, draining, empty, expected): + with mock.patch("cephadm.services.osd.OSD.is_empty", new_callable=mock.PropertyMock(return_value=empty)): + osd_obj.draining = draining + assert osd_obj.is_draining is expected + + @mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop") + def test_is_ok_to_stop(self, _, osd_obj): + osd_obj.is_ok_to_stop + osd_obj.rm_util.ok_to_stop.assert_called_once() + + @pytest.mark.parametrize( + "pg_count, expected", + [ + (0, True), + (1, False), + (9999, False), + (-1, False), + ] + ) + def test_is_empty(self, osd_obj, pg_count, expected): + with mock.patch("cephadm.services.osd.OSD.get_pg_count", return_value=pg_count): + assert osd_obj.is_empty is expected + + @mock.patch("cephadm.services.osd.RemoveUtil.safe_to_destroy") + def test_safe_to_destroy(self, _, osd_obj): + osd_obj.safe_to_destroy() + osd_obj.rm_util.safe_to_destroy.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.set_osd_flag") + def test_down(self, _, osd_obj): + osd_obj.down() + osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'down') + + @mock.patch("cephadm.services.osd.RemoveUtil.destroy_osd") + def test_destroy_osd(self, _, osd_obj): + osd_obj.destroy() + osd_obj.rm_util.destroy_osd.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.purge_osd") + def test_purge(self, _, osd_obj): + osd_obj.purge() + osd_obj.rm_util.purge_osd.assert_called_once() + + @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count") + def test_pg_count(self, _, osd_obj): + osd_obj.get_pg_count() + osd_obj.rm_util.get_pg_count.assert_called_once() + + def test_drain_status_human_not_started(self, osd_obj): + assert osd_obj.drain_status_human() == 'not started' + + def test_drain_status_human_started(self, osd_obj): + osd_obj.started = True + assert osd_obj.drain_status_human() == 'started' + + def test_drain_status_human_draining(self, osd_obj): + osd_obj.started = True + osd_obj.draining = True + assert osd_obj.drain_status_human() == 'draining' + + def test_drain_status_human_done(self, osd_obj): + osd_obj.started = True + osd_obj.draining = False + osd_obj.drain_done_at = datetime.utcnow() + assert osd_obj.drain_status_human() == 'done, waiting for purge' + + +class TestOSDRemovalQueue: + + def test_queue_size(self, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + assert q.queue_size() == 0 + q.osds.add(osd_obj) + assert q.queue_size() == 1 + + @mock.patch("cephadm.services.osd.OSD.start") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_enqueue(self, exist, start, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + q.enqueue(osd_obj) + osd_obj.start.assert_called_once() + + @mock.patch("cephadm.services.osd.OSD.stop") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_rm_raise(self, exist, stop, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + with pytest.raises(KeyError): + q.rm(osd_obj) + osd_obj.stop.assert_called_once() + + @mock.patch("cephadm.services.osd.OSD.stop") + @mock.patch("cephadm.services.osd.OSD.exists") + def test_rm(self, exist, stop, osd_obj): + q = OSDRemovalQueue(mock.Mock()) + q.osds.add(osd_obj) + q.rm(osd_obj) + osd_obj.stop.assert_called_once() diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py new file mode 100644 index 000000000..2454dc0d1 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -0,0 +1,1591 @@ +# Disable autopep8 for this file: + +# fmt: off + +from typing import NamedTuple, List, Dict, Optional +import pytest + +from ceph.deployment.hostspec import HostSpec +from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, IngressSpec +from ceph.deployment.hostspec import SpecValidationError + +from cephadm.module import HostAssignment +from cephadm.schedule import DaemonPlacement +from orchestrator import DaemonDescription, OrchestratorValidationError, OrchestratorError + + +def wrapper(func): + # some odd thingy to revert the order or arguments + def inner(*args): + def inner2(expected): + func(expected, *args) + return inner2 + return inner + + +@wrapper +def none(expected): + assert expected == [] + + +@wrapper +def one_of(expected, *hosts): + if not isinstance(expected, list): + assert False, str(expected) + assert len(expected) == 1, f'one_of failed len({expected}) != 1' + assert expected[0] in hosts + + +@wrapper +def two_of(expected, *hosts): + if not isinstance(expected, list): + assert False, str(expected) + assert len(expected) == 2, f'one_of failed len({expected}) != 2' + matches = 0 + for h in hosts: + matches += int(h in expected) + if matches != 2: + assert False, f'two of {hosts} not in {expected}' + + +@wrapper +def exactly(expected, *hosts): + assert expected == list(hosts) + + +@wrapper +def error(expected, kind, match): + assert isinstance(expected, kind), (str(expected), match) + assert str(expected) == match, (str(expected), match) + + +@wrapper +def _or(expected, *inners): + def catch(inner): + try: + inner(expected) + except AssertionError as e: + return e + result = [catch(i) for i in inners] + if None not in result: + assert False, f"_or failed: {expected}" + + +def _always_true(_): + pass + + +def k(s): + return [e for e in s.split(' ') if e] + + +def get_result(key, results): + def match(one): + for o, k in zip(one, key): + if o != k and o != '*': + return False + return True + return [v for k, v in results if match(k)][0] + + +def mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count): + + if spec_section == 'hosts': + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + hosts=explicit, + count=count, + )) + elif spec_section == 'label': + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + label='mylabel', + count=count, + )) + elif spec_section == 'host_pattern': + pattern = { + 'e': 'notfound', + '1': '1', + '12': '[1-2]', + '123': '*', + }[explicit_key] + mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731 + host_pattern=pattern, + count=count, + )) + else: + assert False + + hosts = [ + HostSpec(h, labels=['mylabel']) if h in explicit else HostSpec(h) + for h in hosts + ] + + return mk_spec, hosts + + +def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems): + key = ' '.join('N' if e is None else str(e) for e in key_elems) + try: + assert_res = get_result(k(key), results) + except IndexError: + try: + spec = mk_spec() + host_res, to_add, to_remove = HostAssignment( + spec=spec, + hosts=hosts, + unreachable_hosts=[], + daemons=daemons, + ).place() + if isinstance(host_res, list): + e = ', '.join(repr(h.hostname) for h in host_res) + assert False, f'`(k("{key}"), exactly({e})),` not found' + assert False, f'`(k("{key}"), ...),` not found' + except OrchestratorError as e: + assert False, f'`(k("{key}"), error({type(e).__name__}, {repr(str(e))})),` not found' + + for _ in range(10): # scheduler has a random component + try: + spec = mk_spec() + host_res, to_add, to_remove = HostAssignment( + spec=spec, + hosts=hosts, + unreachable_hosts=[], + daemons=daemons + ).place() + + assert_res(sorted([h.hostname for h in host_res])) + except Exception as e: + assert_res(e) + + +@pytest.mark.parametrize("dp,n,result", + [ # noqa: E128 + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + 0, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]), + 2, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82]), + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80, 90]), + 2, + DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82, 92]), + ), + ]) +def test_daemon_placement_renumber(dp, n, result): + assert dp.renumber_ports(n) == result + + +@pytest.mark.parametrize( + 'dp,dd,result', + [ + ( + DaemonPlacement(daemon_type='mgr', hostname='host1'), + DaemonDescription('mgr', 'a', 'host1'), + True + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'), + DaemonDescription('mgr', 'a', 'host1'), + True + ), + ( + DaemonPlacement(daemon_type='mon', hostname='host1', name='a'), + DaemonDescription('mgr', 'a', 'host1'), + False + ), + ( + DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'), + DaemonDescription('mgr', 'b', 'host1'), + False + ), + ]) +def test_daemon_placement_match(dp, dd, result): + assert dp.matches_daemon(dd) == result + + +# * first match from the top wins +# * where e=[], *=any +# +# + list of known hosts available for scheduling (host_key) +# | + hosts used for explict placement (explicit_key) +# | | + count +# | | | + section (host, label, pattern) +# | | | | + expected result +# | | | | | +test_explicit_scheduler_results = [ + (k("* * 0 *"), error(SpecValidationError, 'num/count must be >= 1')), + (k("* e N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')), + (k("* e N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')), + (k("* e N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')), + (k("* e * *"), none), + (k("1 12 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")), + (k("1 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")), + (k("1 * * *"), exactly('1')), + (k("12 1 * *"), exactly('1')), + (k("12 12 1 *"), one_of('1', '2')), + (k("12 12 * *"), exactly('1', '2')), + (k("12 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")), + (k("12 123 1 *"), one_of('1', '2', '3')), + (k("12 123 * *"), two_of('1', '2', '3')), + (k("123 1 * *"), exactly('1')), + (k("123 12 1 *"), one_of('1', '2')), + (k("123 12 * *"), exactly('1', '2')), + (k("123 123 1 *"), one_of('1', '2', '3')), + (k("123 123 2 *"), two_of('1', '2', '3')), + (k("123 123 * *"), exactly('1', '2', '3')), +] + + +@pytest.mark.parametrize("spec_section_key,spec_section", + [ # noqa: E128 + ('h', 'hosts'), + ('l', 'label'), + ('p', 'host_pattern'), + ]) +@pytest.mark.parametrize("count", + [ # noqa: E128 + None, + 0, + 1, + 2, + 3, + ]) +@pytest.mark.parametrize("explicit_key, explicit", + [ # noqa: E128 + ('e', []), + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("host_key, hosts", + [ # noqa: E128 + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +def test_explicit_scheduler(host_key, hosts, + explicit_key, explicit, + count, + spec_section_key, spec_section): + + mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count) + run_scheduler_test( + results=test_explicit_scheduler_results, + mk_spec=mk_spec, + hosts=hosts, + daemons=[], + key_elems=(host_key, explicit_key, count, spec_section_key) + ) + + +# * first match from the top wins +# * where e=[], *=any +# +# + list of known hosts available for scheduling (host_key) +# | + hosts used for explict placement (explicit_key) +# | | + count +# | | | + existing daemons +# | | | | + section (host, label, pattern) +# | | | | | + expected result +# | | | | | | +test_scheduler_daemons_results = [ + (k("* 1 * * *"), exactly('1')), + (k("1 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts')), + (k("1 123 * * *"), exactly('1')), + (k("12 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts')), + (k("12 123 N * *"), exactly('1', '2')), + (k("12 123 1 * *"), one_of('1', '2')), + (k("12 123 2 * *"), exactly('1', '2')), + (k("12 123 3 * *"), exactly('1', '2')), + (k("123 123 N * *"), exactly('1', '2', '3')), + (k("123 123 1 e *"), one_of('1', '2', '3')), + (k("123 123 1 1 *"), exactly('1')), + (k("123 123 1 3 *"), exactly('3')), + (k("123 123 1 12 *"), one_of('1', '2')), + (k("123 123 1 112 *"), one_of('1', '2')), + (k("123 123 1 23 *"), one_of('2', '3')), + (k("123 123 1 123 *"), one_of('1', '2', '3')), + (k("123 123 2 e *"), two_of('1', '2', '3')), + (k("123 123 2 1 *"), _or(exactly('1', '2'), exactly('1', '3'))), + (k("123 123 2 3 *"), _or(exactly('1', '3'), exactly('2', '3'))), + (k("123 123 2 12 *"), exactly('1', '2')), + (k("123 123 2 112 *"), exactly('1', '2')), + (k("123 123 2 23 *"), exactly('2', '3')), + (k("123 123 2 123 *"), two_of('1', '2', '3')), + (k("123 123 3 * *"), exactly('1', '2', '3')), +] + + +@pytest.mark.parametrize("spec_section_key,spec_section", + [ # noqa: E128 + ('h', 'hosts'), + ('l', 'label'), + ('p', 'host_pattern'), + ]) +@pytest.mark.parametrize("daemons_key, daemons", + [ # noqa: E128 + ('e', []), + ('1', ['1']), + ('3', ['3']), + ('12', ['1', '2']), + ('112', ['1', '1', '2']), # deal with existing co-located daemons + ('23', ['2', '3']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("count", + [ # noqa: E128 + None, + 1, + 2, + 3, + ]) +@pytest.mark.parametrize("explicit_key, explicit", + [ # noqa: E128 + ('1', ['1']), + ('123', ['1', '2', '3']), + ]) +@pytest.mark.parametrize("host_key, hosts", + [ # noqa: E128 + ('1', ['1']), + ('12', ['1', '2']), + ('123', ['1', '2', '3']), + ]) +def test_scheduler_daemons(host_key, hosts, + explicit_key, explicit, + count, + daemons_key, daemons, + spec_section_key, spec_section): + mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count) + dds = [ + DaemonDescription('mgr', d, d) + for d in daemons + ] + run_scheduler_test( + results=test_scheduler_daemons_results, + mk_spec=mk_spec, + hosts=hosts, + daemons=dds, + key_elems=(host_key, explicit_key, count, daemons_key, spec_section_key) + ) + + +# ========================= + + +class NodeAssignmentTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + post_rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + expected: List[str] + expected_add: List[str] + expected_remove: List[DaemonDescription] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,rank_map,post_rank_map,expected,expected_add,expected_remove", + [ # noqa: E128 + # just hosts + NodeAssignmentTest( + 'mgr', + PlacementSpec(hosts=['smithi060']), + ['smithi060'], + [], + None, None, + ['mgr:smithi060'], ['mgr:smithi060'], [] + ), + # all_hosts + NodeAssignmentTest( + 'mgr', + PlacementSpec(host_pattern='*'), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host3'], + [] + ), + # all_hosts + count_per_host + NodeAssignmentTest( + 'mds', + PlacementSpec(host_pattern='*', count_per_host=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mds', 'a', 'host1'), + DaemonDescription('mds', 'b', 'host2'), + ], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # count that is bigger than the amount of hosts. Truncate to len(hosts) + # mgr should not be co-located to each other. + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=4), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # count that is bigger than the amount of hosts; wrap around. + NodeAssignmentTest( + 'mds', + PlacementSpec(count=6), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # count + partial host list + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=3, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a', 'mgr.b'] + ), + # count + partial host list (with colo) + NodeAssignmentTest( + 'mds', + PlacementSpec(count=3, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mds', 'a', 'host1'), + DaemonDescription('mds', 'b', 'host2'), + ], + None, None, + ['mds:host3', 'mds:host3', 'mds:host3'], + ['mds:host3', 'mds:host3', 'mds:host3'], + ['mds.a', 'mds.b'] + ), + # count 1 + partial host list + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=1, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a', 'mgr.b'] + ), + # count + partial host list + existing + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + ], + None, None, + ['mgr:host3'], + ['mgr:host3'], + ['mgr.a'] + ), + # count + partial host list + existing (deterministic) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host1']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + ], + None, None, + ['mgr:host1'], + [], + [] + ), + # count + partial host list + existing (deterministic) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=2, hosts=['host1']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'a', 'host2'), + ], + None, None, + ['mgr:host1'], + ['mgr:host1'], + ['mgr.a'] + ), + # label only + NodeAssignmentTest( + 'mgr', + PlacementSpec(label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # label + count (truncate to host list) + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=4, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + ['mgr:host1', 'mgr:host2', 'mgr:host3'], + [] + ), + # label + count (with colo) + NodeAssignmentTest( + 'mds', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # label only + count_per_hst + NodeAssignmentTest( + 'mds', + PlacementSpec(label='foo', count_per_host=3), + 'host1 host2 host3'.split(), + [], + None, None, + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', + 'mds:host1', 'mds:host2', 'mds:host3'], + ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', + 'mds:host1', 'mds:host2', 'mds:host3'], + [] + ), + # host_pattern + NodeAssignmentTest( + 'mgr', + PlacementSpec(host_pattern='mgr*'), + 'mgrhost1 mgrhost2 datahost'.split(), + [], + None, None, + ['mgr:mgrhost1', 'mgr:mgrhost2'], + ['mgr:mgrhost1', 'mgr:mgrhost2'], + [] + ), + # host_pattern + count_per_host + NodeAssignmentTest( + 'mds', + PlacementSpec(host_pattern='mds*', count_per_host=3), + 'mdshost1 mdshost2 datahost'.split(), + [], + None, None, + ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], + ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], + [] + ), + # label + count_per_host + ports + NodeAssignmentTest( + 'rgw', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [], + None, None, + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + [] + ), + # label + count_per_host + ports (+ xisting) + NodeAssignmentTest( + 'rgw', + PlacementSpec(count=6, label='foo'), + 'host1 host2 host3'.split(), + [ + DaemonDescription('rgw', 'a', 'host1', ports=[81]), + DaemonDescription('rgw', 'b', 'host2', ports=[80]), + DaemonDescription('rgw', 'c', 'host1', ports=[82]), + ], + None, None, + ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', + 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw:host1(*:80)', 'rgw:host3(*:80)', + 'rgw:host2(*:81)', 'rgw:host3(*:81)'], + ['rgw.c'] + ), + # cephadm.py teuth case + NodeAssignmentTest( + 'mgr', + PlacementSpec(count=3, hosts=['host1=y', 'host2=x']), + 'host1 host2'.split(), + [ + DaemonDescription('mgr', 'y', 'host1'), + DaemonDescription('mgr', 'x', 'host2'), + ], + None, None, + ['mgr:host1(name=y)', 'mgr:host2(name=x)'], + [], [] + ), + + # note: host -> rank mapping is psuedo-random based on svc name, so these + # host/rank pairs may seem random but they match the nfs.mynfs seed used by + # the test. + + # ranked, fresh + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [], + {}, + {0: {0: None}, 1: {0: None}, 2: {0: None}}, + ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'], + ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'], + [] + ), + # 21: ranked, exist + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + ], + {0: {1: '0.1'}}, + {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'], + ['nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'], + [] + ), + # ranked, exist, different ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.1)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, exist, different ranks (2) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.1)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + [] + ), + # ranked, exist, extra ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '4.5', 'host2', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {0: None}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + ['nfs.4.5'] + ), + # 25: ranked, exist, extra ranks (scale down: kill off high rank) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host3 host2 host1'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=2, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)'], + [], + ['nfs.2.5'] + ), + # ranked, exist, extra ranks (scale down hosts) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5', 6: None}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host3(rank=1.6)'], + ['nfs:host3(rank=1.6)'], + ['nfs.2.5', 'nfs.1.5'] + ), + # ranked, exist, duplicate rank + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.0', 'host1', rank=0, rank_generation=0), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + DaemonDescription('nfs', '1.2', 'host3', rank=1, rank_generation=2), + ], + {0: {0: '0.0'}, 1: {2: '1.2'}}, + {0: {0: '0.0'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.0)', 'nfs:host3(rank=1.2)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + ['nfs.1.1'] + ), + # 28: ranked, all gens stale (failure during update cycle) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}}, + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.4)'], + ['nfs:host3(rank=1.4)'], + ['nfs.1.2'] + ), + # ranked, not enough hosts + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=4), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}}, + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.2)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, scale down + NodeAssignmentTest( + 'nfs', + PlacementSpec(hosts=['host2']), + 'host1 host2'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + DaemonDescription('nfs', '2.2', 'host3', rank=2, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + {0: {2: '0.2', 3: None}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + ['nfs:host2(rank=0.3)'], + ['nfs:host2(rank=0.3)'], + ['nfs.0.2', 'nfs.1.2', 'nfs.2.2'] + ), + + ]) +def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map, + expected, expected_add, expected_remove): + spec = None + service_id = None + allow_colo = False + if service_type == 'rgw': + service_id = 'realm.zone' + allow_colo = True + elif service_type == 'mds': + service_id = 'myfs' + allow_colo = True + elif service_type == 'nfs': + service_id = 'mynfs' + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + if not spec: + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + all_slots, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], + daemons=daemons, + allow_colo=allow_colo, + rank_map=rank_map, + ).place() + + assert rank_map == post_rank_map + + got = [str(p) for p in all_slots] + num_wildcard = 0 + for i in expected: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + got = [str(p) for p in to_add] + num_wildcard = 0 + for i in expected_add: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + assert sorted([d.name() for d in to_remove]) == sorted(expected_remove) + + +class NodeAssignmentTest5(NamedTuple): + service_type: str + placement: PlacementSpec + available_hosts: List[str] + candidates_hosts: List[str] + + +@pytest.mark.parametrize("service_type, placement, available_hosts, expected_candidates", + [ # noqa: E128 + NodeAssignmentTest5( + 'alertmanager', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host3 host1 host4 host2'.split(), + ), + NodeAssignmentTest5( + 'prometheus', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host3 host2 host4 host1'.split(), + ), + NodeAssignmentTest5( + 'grafana', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host2 host4 host3'.split(), + ), + NodeAssignmentTest5( + 'mgr', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host4 host2 host1 host3'.split(), + ), + NodeAssignmentTest5( + 'mon', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host3 host4 host2'.split(), + ), + NodeAssignmentTest5( + 'rgw', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host1 host3 host2 host4'.split(), + ), + NodeAssignmentTest5( + 'cephfs-mirror', + PlacementSpec(hosts='host1 host2 host3 host4'.split()), + 'host1 host2 host3 host4'.split(), + 'host4 host3 host1 host2'.split(), + ), + ]) +def test_node_assignment_random_shuffle(service_type, placement, available_hosts, expected_candidates): + spec = None + service_id = None + allow_colo = False + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) + + candidates = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in available_hosts], + unreachable_hosts=[], + daemons=[], + allow_colo=allow_colo, + ).get_candidates() + + candidates_hosts = [h.hostname for h in candidates] + assert candidates_hosts == expected_candidates + + +class NodeAssignmentTest2(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected_len: int + in_set: List[str] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,in_set", + [ # noqa: E128 + # just count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'], + ), + + # hosts + (smaller) count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2'.split()), + 'host1 host2'.split(), + [], + 1, + ['host1', 'host2'], + ), + # hosts + (smaller) count, existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2 host3'.split()), + 'host1 host2 host3'.split(), + [DaemonDescription('mgr', 'mgr.a', 'host1')], + 1, + ['host1', 'host2', 'host3'], + ), + # hosts + (smaller) count, (more) existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, hosts='host1 host2 host3'.split()), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + ], + 1, + ['host1', 'host2'] + ), + # count + partial host list + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'] + ), + # label + count + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=1, label='foo'), + 'host1 host2 host3'.split(), + [], + 1, + ['host1', 'host2', 'host3'] + ), + ]) +def test_node_assignment2(service_type, placement, hosts, + daemons, expected_len, in_set): + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], + daemons=daemons, + ).place() + assert len(hosts) == expected_len + for h in [h.hostname for h in hosts]: + assert h in in_set + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,must_have", + [ # noqa: E128 + # hosts + (smaller) count, (more) existing + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=3, hosts='host3'.split()), + 'host1 host2 host3'.split(), + [], + 1, + ['host3'] + ), + # count + partial host list + NodeAssignmentTest2( + 'mgr', + PlacementSpec(count=2, hosts=['host3']), + 'host1 host2 host3'.split(), + [], + 1, + ['host3'] + ), + ]) +def test_node_assignment3(service_type, placement, hosts, + daemons, expected_len, must_have): + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + daemons=daemons, + ).place() + assert len(hosts) == expected_len + for h in must_have: + assert h in [h.hostname for h in hosts] + + +class NodeAssignmentTest4(NamedTuple): + spec: ServiceSpec + networks: Dict[str, Dict[str, Dict[str, List[str]]]] + daemons: List[DaemonDescription] + expected: List[str] + expected_add: List[str] + expected_remove: List[DaemonDescription] + + +@pytest.mark.parametrize("spec,networks,daemons,expected,expected_add,expected_remove", + [ # noqa: E128 + NodeAssignmentTest4( + ServiceSpec( + service_type='rgw', + service_id='foo', + placement=PlacementSpec(count=6, label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth0': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth0': ['192.168.0.1']}}, + }, + [], + ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)', + 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)', + 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'], + ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)', + 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)', + 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'], + [] + ), + NodeAssignmentTest4( + IngressSpec( + service_type='ingress', + service_id='rgw.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='rgw.foo', + placement=PlacementSpec(label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}}, + }, + [], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + [] + ), + NodeAssignmentTest4( + IngressSpec( + service_type='ingress', + service_id='rgw.foo', + frontend_port=443, + monitor_port=8888, + virtual_ip='10.0.0.20/8', + backend_service='rgw.foo', + placement=PlacementSpec(label='foo'), + networks=['10.0.0.0/8'], + ), + { + 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}}, + 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}}, + 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}}, + }, + [ + DaemonDescription('haproxy', 'a', 'host1', ip='10.0.0.1', + ports=[443, 8888]), + DaemonDescription('keepalived', 'b', 'host2'), + DaemonDescription('keepalived', 'c', 'host3'), + ], + ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1', 'keepalived:host2'], + ['haproxy:host2(10.0.0.2:443,8888)', + 'keepalived:host1'], + ['keepalived.c'] + ), + ]) +def test_node_assignment4(spec, networks, daemons, + expected, expected_add, expected_remove): + all_slots, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()], + unreachable_hosts=[], + daemons=daemons, + allow_colo=True, + networks=networks, + primary_daemon_type='haproxy' if spec.service_type == 'ingress' else spec.service_type, + per_host_daemon_type='keepalived' if spec.service_type == 'ingress' else None, + ).place() + + got = [str(p) for p in all_slots] + num_wildcard = 0 + for i in expected: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + got = [str(p) for p in to_add] + num_wildcard = 0 + for i in expected_add: + if i == '*': + num_wildcard += 1 + else: + assert i in got + got.remove(i) + assert num_wildcard == len(got) + + assert sorted([d.name() for d in to_remove]) == sorted(expected_remove) + + +@pytest.mark.parametrize("placement", + [ # noqa: E128 + ('1 *'), + ('* label:foo'), + ('* host1 host2'), + ('hostname12hostname12hostname12hostname12hostname12hostname12hostname12'), # > 63 chars + ]) +def test_bad_placements(placement): + try: + PlacementSpec.from_string(placement.split(' ')) + assert False + except SpecValidationError: + pass + + +class NodeAssignmentTestBadSpec(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected: str + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected", + [ # noqa: E128 + # unknown host + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(hosts=['unknownhost']), + ['knownhost'], + [], + "Cannot place <ServiceSpec for service_name=mgr> on unknownhost: Unknown hosts" + ), + # unknown host pattern + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(host_pattern='unknownhost'), + ['knownhost'], + [], + "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts" + ), + # unknown label + NodeAssignmentTestBadSpec( + 'mgr', + PlacementSpec(label='unknownlabel'), + [], + [], + "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label unknownlabel" + ), + ]) +def test_bad_specs(service_type, placement, hosts, daemons, expected): + with pytest.raises(OrchestratorValidationError) as e: + hosts, to_add, to_remove = HostAssignment( + spec=ServiceSpec(service_type, placement=placement), + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + daemons=daemons, + ).place() + assert str(e.value) == expected + + +class ActiveAssignmentTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + daemons: List[DaemonDescription] + expected: List[List[str]] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected,expected_add,expected_remove", + [ + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1', 'host2'], ['host1', 'host3']], + [[]], + [['mgr.b'], ['mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3'], ['host2', 'host3']], + [[]], + [['mgr.a'], ['mgr.b']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host2']], + [[]], + [['mgr.a', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host3']], + [[]], + [['mgr.a', 'mgr.b']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host3']], + [[]], + [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host2', 'host3']], + [[]], + [['mgr.a']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2', is_active=True), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1'], ['host2'], ['host3']], + [[]], + [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c'], ['mgr.a', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']], + [[]], + [['mgr.a2', 'mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1', is_active=True), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3'), + ], + [['host1']], + [[]], + [['mgr.a', 'mgr.b', 'mgr.c'], ['mgr.a2', 'mgr.b', 'mgr.c']] + ), + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1', is_active=True), + DaemonDescription('mgr', 'a2', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1', 'host3']], + [[]], + [['mgr.a2', 'mgr.b']] + ), + # Explicit placement should override preference for active daemon + ActiveAssignmentTest( + 'mgr', + PlacementSpec(count=1, hosts=['host1']), + 'host1 host2 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [['host1']], + [[]], + [['mgr.b', 'mgr.c']] + ), + + ]) +def test_active_assignment(service_type, placement, hosts, daemons, expected, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in hosts]) in expected + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class UnreachableHostsTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + unreachables_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove", + [ + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + ['host2'], + [], + [['host1', 'host3']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(hosts=['host3']), + 'host1 host2 host3'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3 host4'.split(), + 'host1 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host2'], + [], + [['host1', 'host3', 'host4']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + 'host1 host4'.split(), + [], + [['host2', 'host3']], + [[]], + ), + + ]) +def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[HostSpec(h) for h in unreachable_hosts], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove + + +class RescheduleFromOfflineTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + maintenance_hosts: List[str] + offline_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,daemons,expected_add,expected_remove", + [ + RescheduleFromOfflineTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [], + ['host2'], + [ + DaemonDescription('nfs', 'a', 'host1'), + DaemonDescription('nfs', 'b', 'host2'), + ], + [['host3']], + [[]], + ), + RescheduleFromOfflineTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + ['host2'], + [], + [ + DaemonDescription('nfs', 'a', 'host1'), + DaemonDescription('nfs', 'b', 'host2'), + ], + [[]], + [[]], + ), + RescheduleFromOfflineTest( + 'mon', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [], + ['host2'], + [ + DaemonDescription('mon', 'a', 'host1'), + DaemonDescription('mon', 'b', 'host2'), + ], + [[]], + [[]], + ), + ]) +def test_remove_from_offline(service_type, placement, hosts, maintenance_hosts, offline_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id='test', + placement=placement) + + host_specs = [HostSpec(h) for h in hosts] + for h in host_specs: + if h.hostname in offline_hosts: + h.status = 'offline' + if h.hostname in maintenance_hosts: + h.status = 'maintenance' + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=host_specs, + unreachable_hosts=[h for h in host_specs if h.status], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py new file mode 100644 index 000000000..57cd12456 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -0,0 +1,1031 @@ +from textwrap import dedent +import json +import yaml + +import pytest + +from unittest.mock import MagicMock, call, patch, ANY + +from cephadm.serve import CephadmServe +from cephadm.services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ + RbdMirrorService, CrashService, CephadmDaemonDeploySpec +from cephadm.services.iscsi import IscsiService +from cephadm.services.nfs import NFSService +from cephadm.services.osd import OSDService +from cephadm.services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \ + NodeExporterService +from cephadm.services.exporter import CephadmExporter +from cephadm.module import CephadmOrchestrator +from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \ + ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec +from cephadm.tests.fixtures import with_host, with_service, _run_cephadm + +from orchestrator import OrchestratorError +from orchestrator._interface import DaemonDescription + + +class FakeInventory: + def get_addr(self, name: str) -> str: + return '1.2.3.4' + + +class FakeMgr: + def __init__(self): + self.config = '' + self.check_mon_command = MagicMock(side_effect=self._check_mon_command) + self.mon_command = MagicMock(side_effect=self._check_mon_command) + self.template = MagicMock() + self.log = MagicMock() + self.inventory = FakeInventory() + + def _check_mon_command(self, cmd_dict, inbuf=None): + prefix = cmd_dict.get('prefix') + if prefix == 'get-cmd': + return 0, self.config, '' + if prefix == 'set-cmd': + self.config = cmd_dict.get('value') + return 0, 'value set', '' + return -1, '', 'error' + + def get_minimal_ceph_conf(self) -> str: + return '' + + def get_mgr_ip(self) -> str: + return '1.2.3.4' + + +class TestCephadmService: + def test_set_service_url_on_dashboard(self): + # pylint: disable=protected-access + mgr = FakeMgr() + service_url = 'http://svc:1000' + service = GrafanaService(mgr) + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + assert mgr.config == service_url + + # set-cmd should not be called if value doesn't change + mgr.check_mon_command.reset_mock() + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'}) + + def _get_services(self, mgr): + # services: + osd_service = OSDService(mgr) + nfs_service = NFSService(mgr) + mon_service = MonService(mgr) + mgr_service = MgrService(mgr) + mds_service = MdsService(mgr) + rgw_service = RgwService(mgr) + rbd_mirror_service = RbdMirrorService(mgr) + grafana_service = GrafanaService(mgr) + alertmanager_service = AlertmanagerService(mgr) + prometheus_service = PrometheusService(mgr) + node_exporter_service = NodeExporterService(mgr) + crash_service = CrashService(mgr) + iscsi_service = IscsiService(mgr) + cephadm_exporter_service = CephadmExporter(mgr) + cephadm_services = { + 'mon': mon_service, + 'mgr': mgr_service, + 'osd': osd_service, + 'mds': mds_service, + 'rgw': rgw_service, + 'rbd-mirror': rbd_mirror_service, + 'nfs': nfs_service, + 'grafana': grafana_service, + 'alertmanager': alertmanager_service, + 'prometheus': prometheus_service, + 'node-exporter': node_exporter_service, + 'crash': crash_service, + 'iscsi': iscsi_service, + 'cephadm-exporter': cephadm_exporter_service, + } + return cephadm_services + + def test_get_auth_entity(self): + mgr = FakeMgr() + cephadm_services = self._get_services(mgr) + + for daemon_type in ['rgw', 'rbd-mirror', 'nfs', "iscsi"]: + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1", "host") + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1", "") + assert "client.%s.id1" % (daemon_type) == \ + cephadm_services[daemon_type].get_auth_entity("id1") + + assert "client.crash.host" == \ + cephadm_services["crash"].get_auth_entity("id1", "host") + with pytest.raises(OrchestratorError): + cephadm_services["crash"].get_auth_entity("id1", "") + cephadm_services["crash"].get_auth_entity("id1") + + assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "host") + assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "") + assert "mon." == cephadm_services["mon"].get_auth_entity("id1") + + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "host") + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "") + assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1") + + for daemon_type in ["osd", "mds"]: + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1", "host") + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1", "") + assert "%s.id1" % daemon_type == \ + cephadm_services[daemon_type].get_auth_entity("id1") + + # services based on CephadmService shouldn't have get_auth_entity + with pytest.raises(AttributeError): + for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'cephadm-exporter']: + cephadm_services[daemon_type].get_auth_entity("id1", "host") + cephadm_services[daemon_type].get_auth_entity("id1", "") + cephadm_services[daemon_type].get_auth_entity("id1") + + +class TestISCSIService: + + mgr = FakeMgr() + iscsi_service = IscsiService(mgr) + + iscsi_spec = IscsiServiceSpec(service_type='iscsi', service_id="a") + iscsi_spec.daemon_type = "iscsi" + iscsi_spec.daemon_id = "a" + iscsi_spec.spec = MagicMock() + iscsi_spec.spec.daemon_type = "iscsi" + iscsi_spec.spec.ssl_cert = '' + iscsi_spec.api_user = "user" + iscsi_spec.api_password = "password" + iscsi_spec.api_port = 5000 + iscsi_spec.api_secure = False + iscsi_spec.ssl_cert = "cert" + iscsi_spec.ssl_key = "key" + + mgr.spec_store = MagicMock() + mgr.spec_store.all_specs.get.return_value = iscsi_spec + + def test_iscsi_client_caps(self): + + iscsi_daemon_spec = CephadmDaemonDeploySpec( + host='host', daemon_id='a', service_name=self.iscsi_spec.service_name()) + + self.iscsi_service.prepare_create(iscsi_daemon_spec) + + expected_caps = ['mon', + 'profile rbd, allow command "osd blocklist", allow command "config-key get" with "key" prefix "iscsi/"', + 'mgr', 'allow command "service status"', + 'osd', 'allow rwx'] + + expected_call = call({'prefix': 'auth get-or-create', + 'entity': 'client.iscsi.a', + 'caps': expected_caps}) + expected_call2 = call({'prefix': 'auth caps', + 'entity': 'client.iscsi.a', + 'caps': expected_caps}) + + assert expected_call in self.mgr.mon_command.mock_calls + assert expected_call2 in self.mgr.mon_command.mock_calls + + @patch('cephadm.utils.resolve_ip') + def test_iscsi_dashboard_config(self, mock_resolve_ip): + + self.mgr.check_mon_command = MagicMock() + self.mgr.check_mon_command.return_value = ('', '{"gateways": {}}', '') + + # Case 1: use IPV4 address + id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1", + daemon_id="a", ip='192.168.1.1') + daemon_list = [id1] + mock_resolve_ip.return_value = '192.168.1.1' + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'http://user:password@192.168.1.1:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + # Case 2: use IPV6 address + self.mgr.check_mon_command.reset_mock() + + id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1", + daemon_id="a", ip='FEDC:BA98:7654:3210:FEDC:BA98:7654:3210') + mock_resolve_ip.return_value = 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210' + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'http://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + # Case 3: IPV6 Address . Secure protocol + self.mgr.check_mon_command.reset_mock() + + self.iscsi_spec.api_secure = True + + self.iscsi_service.config_dashboard(daemon_list) + + dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add', + 'name': 'testhost1'}, + 'https://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000') + + assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls + + +class TestMonitoring: + def _get_config(self, url: str) -> str: + return f""" + # This file is generated by cephadm. + # See https://prometheus.io/docs/alerting/configuration/ for documentation. + + global: + resolve_timeout: 5m + http_config: + tls_config: + insecure_skip_verify: true + + route: + receiver: 'default' + routes: + - group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' + + receivers: + - name: 'default' + webhook_configs: + - name: 'ceph-dashboard' + webhook_configs: + - url: '{url}/api/prometheus_receiver' + """ + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("mgr_module.MgrModule.get") + def test_alertmanager_config(self, mock_get, _run_cephadm, + cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + mock_get.return_value = {"services": {"dashboard": "http://[::1]:8080"}} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, AlertManagerSpec()): + y = dedent(self._get_config('http://localhost:8080')).lstrip() + _run_cephadm.assert_called_with( + 'test', + 'alertmanager.test', + 'deploy', + [ + '--name', 'alertmanager.test', + '--meta-json', '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', '--tcp-ports', '9093 9094' + ], + stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}), + image='') + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("mgr_module.MgrModule.get") + def test_alertmanager_config_v6(self, mock_get, _run_cephadm, + cephadm_module: CephadmOrchestrator): + dashboard_url = "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080" + _run_cephadm.return_value = ('{}', '', 0) + mock_get.return_value = {"services": {"dashboard": dashboard_url}} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, AlertManagerSpec()): + y = dedent(self._get_config(dashboard_url)).lstrip() + _run_cephadm.assert_called_with( + 'test', + 'alertmanager.test', + 'deploy', + [ + '--name', 'alertmanager.test', + '--meta-json', + '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', '--tcp-ports', '9093 9094' + ], + stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}), + image='') + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("mgr_module.MgrModule.get") + @patch("socket.getfqdn") + def test_alertmanager_config_v6_fqdn(self, mock_getfqdn, mock_get, _run_cephadm, + cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + mock_getfqdn.return_value = "mgr.test.fqdn" + mock_get.return_value = {"services": { + "dashboard": "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080"}} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, AlertManagerSpec()): + y = dedent(self._get_config("http://mgr.test.fqdn:8080")).lstrip() + _run_cephadm.assert_called_with( + 'test', + 'alertmanager.test', + 'deploy', + [ + '--name', 'alertmanager.test', + '--meta-json', + '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', '--tcp-ports', '9093 9094' + ], + stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}), + image='') + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("mgr_module.MgrModule.get") + def test_alertmanager_config_v4(self, mock_get, _run_cephadm, cephadm_module: CephadmOrchestrator): + dashboard_url = "http://192.168.0.123:8080" + _run_cephadm.return_value = ('{}', '', 0) + mock_get.return_value = {"services": {"dashboard": dashboard_url}} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, AlertManagerSpec()): + y = dedent(self._get_config(dashboard_url)).lstrip() + _run_cephadm.assert_called_with( + 'test', + 'alertmanager.test', + 'deploy', + [ + '--name', 'alertmanager.test', + '--meta-json', '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', '--tcp-ports', '9093 9094' + ], + stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}), + image='') + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("mgr_module.MgrModule.get") + @patch("socket.getfqdn") + def test_alertmanager_config_v4_fqdn(self, mock_getfqdn, mock_get, _run_cephadm, + cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + mock_getfqdn.return_value = "mgr.test.fqdn" + mock_get.return_value = {"services": {"dashboard": "http://192.168.0.123:8080"}} + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, AlertManagerSpec()): + y = dedent(self._get_config("http://mgr.test.fqdn:8080")).lstrip() + _run_cephadm.assert_called_with( + 'test', + 'alertmanager.test', + 'deploy', + [ + '--name', 'alertmanager.test', + '--meta-json', + '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', '--tcp-ports', '9093 9094' + ], + stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}), + image='') + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_prometheus_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \ + with_service(cephadm_module, MonitoringSpec('prometheus')) as _: + + y = dedent(""" + # This file is generated by cephadm. + global: + scrape_interval: 10s + evaluation_interval: 10s + rule_files: + - /etc/prometheus/alerting/* + scrape_configs: + - job_name: 'ceph' + honor_labels: true + static_configs: + - targets: + - '[::1]:9283' + + - job_name: 'node' + static_configs: + - targets: ['[1::4]:9100'] + labels: + instance: 'test' + + """).lstrip() + + _run_cephadm.assert_called_with( + 'test', + 'prometheus.test', + 'deploy', + [ + '--name', 'prometheus.test', + '--meta-json', + '{"service_name": "prometheus", "ports": [9095], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', + '--tcp-ports', '9095' + ], + stdin=json.dumps({"files": {"prometheus.yml": y, + "/etc/prometheus/alerting/custom_alerts.yml": ""}, + 'retention_time': '15d'}), + image='') + + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') + @patch("cephadm.services.monitoring.verify_tls", lambda *_: None) + def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + with with_host(cephadm_module, 'test'): + cephadm_module.set_store('test/grafana_crt', 'c') + cephadm_module.set_store('test/grafana_key', 'k') + with with_service(cephadm_module, MonitoringSpec('prometheus')) as _, \ + with_service(cephadm_module, GrafanaSpec('grafana')) as _: + files = { + 'grafana.ini': dedent(""" + # This file is generated by cephadm. + [users] + default_theme = light + [auth.anonymous] + enabled = true + org_name = 'Main Org.' + org_role = 'Viewer' + [server] + domain = 'bootstrap.storage.lab' + protocol = https + cert_file = /etc/grafana/certs/cert_file + cert_key = /etc/grafana/certs/cert_key + http_port = 3000 + http_addr = + [security] + disable_initial_admin_creation = true + cookie_secure = true + cookie_samesite = none + allow_embedding = true""").lstrip(), # noqa: W291 + 'provisioning/datasources/ceph-dashboard.yml': dedent(""" + # This file is generated by cephadm. + deleteDatasources: + - name: 'Dashboard1' + orgId: 1 + + datasources: + - name: 'Dashboard1' + type: 'prometheus' + access: 'proxy' + orgId: 1 + url: 'http://[1::4]:9095' + basicAuth: false + isDefault: true + editable: false + """).lstrip(), + 'certs/cert_file': dedent(""" + # generated by cephadm + c""").lstrip(), + 'certs/cert_key': dedent(""" + # generated by cephadm + k""").lstrip(), + } + + _run_cephadm.assert_called_with( + 'test', + 'grafana.test', + 'deploy', + [ + '--name', 'grafana.test', + '--meta-json', + '{"service_name": "grafana", "ports": [3000], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', '--tcp-ports', '3000'], + stdin=json.dumps({"files": files}), + image='') + + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')): + out = cephadm_module.cephadm_services['grafana'].generate_config( + CephadmDaemonDeploySpec('test', 'daemon', 'grafana')) + assert out == ( + { + 'files': + { + 'certs/cert_file': ANY, + 'certs/cert_key': ANY, + 'grafana.ini': + '# This file is generated by cephadm.\n' + '[users]\n' + ' default_theme = light\n' + '[auth.anonymous]\n' + ' enabled = true\n' + " org_name = 'Main Org.'\n" + " org_role = 'Viewer'\n" + '[server]\n' + " domain = 'bootstrap.storage.lab'\n" + ' protocol = https\n' + ' cert_file = /etc/grafana/certs/cert_file\n' + ' cert_key = /etc/grafana/certs/cert_key\n' + ' http_port = 3000\n' + ' http_addr = \n' + '[security]\n' + ' admin_user = admin\n' + ' admin_password = secure\n' + ' cookie_secure = true\n' + ' cookie_samesite = none\n' + ' allow_embedding = true', + 'provisioning/datasources/ceph-dashboard.yml': + '# This file is generated by cephadm.\n' + 'deleteDatasources:\n' + '\n' + 'datasources:\n' + } + }, + [], + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + with with_host(cephadm_module, 'test'): + + yaml_str = """service_type: alertmanager +service_name: alertmanager +placement: + count: 1 +spec: + port: 4200 +""" + yaml_file = yaml.safe_load(yaml_str) + spec = ServiceSpec.from_json(yaml_file) + + with patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])): + with with_service(cephadm_module, spec): + + CephadmServe(cephadm_module)._check_daemons() + + _run_cephadm.assert_called_with( + 'test', 'alertmanager.test', 'deploy', [ + '--name', 'alertmanager.test', + '--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', + '--tcp-ports', '4200 9094', + '--reconfig' + ], + stdin='{}', + image='') + + +class TestRGWService: + + @pytest.mark.parametrize( + "frontend, ssl, expected", + [ + ('beast', False, 'beast endpoint=[fd00:fd00:fd00:3000::1]:80'), + ('beast', True, + 'beast ssl_endpoint=[fd00:fd00:fd00:3000::1]:443 ssl_certificate=config://rgw/cert/rgw.foo'), + ('civetweb', False, 'civetweb port=[fd00:fd00:fd00:3000::1]:80'), + ('civetweb', True, + 'civetweb port=[fd00:fd00:fd00:3000::1]:443s ssl_certificate=config://rgw/cert/rgw.foo'), + ] + ) + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_update(self, frontend, ssl, expected, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + cephadm_module.cache.update_host_devices_networks( + 'host1', + dls=cephadm_module.cache.devices['host1'], + nets={ + 'fd00:fd00:fd00:3000::/64': { + 'if0': ['fd00:fd00:fd00:3000::1'] + } + }) + s = RGWSpec(service_id="foo", + networks=['fd00:fd00:fd00:3000::/64'], + ssl=ssl, + rgw_frontend_type=frontend) + with with_service(cephadm_module, s) as dds: + _, f, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': f'client.{dds[0]}', + 'key': 'rgw_frontends', + }) + assert f == expected + + +class TestSNMPGateway: + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v2c_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + spec = SNMPGatewaySpec( + snmp_version='V2c', + snmp_destination='192.168.1.1:162', + credentials={ + 'snmp_community': 'public' + }) + + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + "snmp_community": spec.credentials.get('snmp_community') + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + 'snmp-gateway.test', + 'deploy', + [ + '--name', 'snmp-gateway.test', + '--meta-json', + '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', + '--tcp-ports', '9464' + ], + stdin=json.dumps(config), + image='' + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v2c_with_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + spec = SNMPGatewaySpec( + snmp_version='V2c', + snmp_destination='192.168.1.1:162', + credentials={ + 'snmp_community': 'public' + }, + port=9465) + + config = { + "destination": spec.snmp_destination, + "snmp_version": spec.snmp_version, + "snmp_community": spec.credentials.get('snmp_community') + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + 'snmp-gateway.test', + 'deploy', + [ + '--name', 'snmp-gateway.test', + '--meta-json', + '{"service_name": "snmp-gateway", "ports": [9465], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', + '--tcp-ports', '9465' + ], + stdin=json.dumps(config), + image='' + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v3nopriv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + spec = SNMPGatewaySpec( + snmp_version='V3', + snmp_destination='192.168.1.1:162', + engine_id='8000C53F00000000', + credentials={ + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword' + }) + + config = { + 'destination': spec.snmp_destination, + 'snmp_version': spec.snmp_version, + 'snmp_v3_auth_protocol': 'SHA', + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_engine_id': '8000C53F00000000' + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + 'snmp-gateway.test', + 'deploy', + [ + '--name', 'snmp-gateway.test', + '--meta-json', + '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', + '--tcp-ports', '9464' + ], + stdin=json.dumps(config), + image='' + ) + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_snmp_v3priv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + spec = SNMPGatewaySpec( + snmp_version='V3', + snmp_destination='192.168.1.1:162', + engine_id='8000C53F00000000', + auth_protocol='MD5', + privacy_protocol='AES', + credentials={ + 'snmp_v3_auth_username': 'myuser', + 'snmp_v3_auth_password': 'mypassword', + 'snmp_v3_priv_password': 'mysecret', + }) + + config = { + 'destination': spec.snmp_destination, + 'snmp_version': spec.snmp_version, + 'snmp_v3_auth_protocol': 'MD5', + 'snmp_v3_auth_username': spec.credentials.get('snmp_v3_auth_username'), + 'snmp_v3_auth_password': spec.credentials.get('snmp_v3_auth_password'), + 'snmp_v3_engine_id': '8000C53F00000000', + 'snmp_v3_priv_protocol': spec.privacy_protocol, + 'snmp_v3_priv_password': spec.credentials.get('snmp_v3_priv_password'), + } + + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'test', + 'snmp-gateway.test', + 'deploy', + [ + '--name', 'snmp-gateway.test', + '--meta-json', + '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', + '--config-json', '-', + '--tcp-ports', '9464' + ], + stdin=json.dumps(config), + image='' + ) + + +class TestIngressService: + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + with with_host(cephadm_module, 'test'): + cephadm_module.cache.update_host_devices_networks( + 'test', + cephadm_module.cache.devices['test'], + { + '1.2.3.0/24': { + 'if0': ['1.2.3.4/32'] + } + } + ) + + # the ingress backend + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ip="1.2.3.4/32") + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://localhost:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1::4\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 1s\n ' + 'timeout server 1s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind 1.2.3.4:8999\n ' + 'bind localhost:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind 1.2.3.4:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + haproxy_generated_conf[1][0] + ' 1::4:80 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + with with_host(cephadm_module, 'test'): + cephadm_module.cache.update_host_devices_networks('test', [], { + '1.2.3.0/24': { + 'if0': ['1.2.3.4/32'] + } + }) + + # Check the ingress with multiple VIPs + s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), + rgw_frontend_type='beast') + + ispec = IngressSpec(service_type='ingress', + service_id='test', + backend_service='rgw.foo', + frontend_port=8089, + monitor_port=8999, + monitor_user='admin', + monitor_password='12345', + keepalived_password='12345', + virtual_interface_networks=['1.2.3.0/24'], + virtual_ips_list=["1.2.3.4/32"]) + with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _: + # generate the keepalived conf based on the specified spec + # Test with only 1 IP on the list, as it will fail with more VIPS but only one host. + keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + keepalived_expected_conf = { + 'files': + { + 'keepalived.conf': + '# This file is generated by cephadm.\n' + 'vrrp_script check_backend {\n ' + 'script "/usr/bin/curl http://localhost:8999/health"\n ' + 'weight -20\n ' + 'interval 2\n ' + 'rise 2\n ' + 'fall 2\n}\n\n' + 'vrrp_instance VI_0 {\n ' + 'state MASTER\n ' + 'priority 100\n ' + 'interface if0\n ' + 'virtual_router_id 50\n ' + 'advert_int 1\n ' + 'authentication {\n ' + 'auth_type PASS\n ' + 'auth_pass 12345\n ' + '}\n ' + 'unicast_src_ip 1::4\n ' + 'unicast_peer {\n ' + '}\n ' + 'virtual_ipaddress {\n ' + '1.2.3.4/32 dev if0\n ' + '}\n ' + 'track_script {\n ' + 'check_backend\n }\n' + '}\n' + } + } + + # check keepalived config + assert keepalived_generated_conf[0] == keepalived_expected_conf + + # generate the haproxy conf based on the specified spec + haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config( + CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name())) + + haproxy_expected_conf = { + 'files': + { + 'haproxy.cfg': + '# This file is generated by cephadm.' + '\nglobal\n log ' + '127.0.0.1 local2\n ' + 'chroot /var/lib/haproxy\n ' + 'pidfile /var/lib/haproxy/haproxy.pid\n ' + 'maxconn 8000\n ' + 'daemon\n ' + 'stats socket /var/lib/haproxy/stats\n' + '\ndefaults\n ' + 'mode http\n ' + 'log global\n ' + 'option httplog\n ' + 'option dontlognull\n ' + 'option http-server-close\n ' + 'option forwardfor except 127.0.0.0/8\n ' + 'option redispatch\n ' + 'retries 3\n ' + 'timeout queue 20s\n ' + 'timeout connect 5s\n ' + 'timeout http-request 1s\n ' + 'timeout http-keep-alive 5s\n ' + 'timeout client 1s\n ' + 'timeout server 1s\n ' + 'timeout check 5s\n ' + 'maxconn 8000\n' + '\nfrontend stats\n ' + 'mode http\n ' + 'bind *:8999\n ' + 'bind localhost:8999\n ' + 'stats enable\n ' + 'stats uri /stats\n ' + 'stats refresh 10s\n ' + 'stats auth admin:12345\n ' + 'http-request use-service prometheus-exporter if { path /metrics }\n ' + 'monitor-uri /health\n' + '\nfrontend frontend\n ' + 'bind *:8089\n ' + 'default_backend backend\n\n' + 'backend backend\n ' + 'option forwardfor\n ' + 'balance static-rr\n ' + 'option httpchk HEAD / HTTP/1.0\n ' + 'server ' + + haproxy_generated_conf[1][0] + ' 1::4:80 check weight 100\n' + } + } + + assert haproxy_generated_conf[0] == haproxy_expected_conf + + +class TestCephFsMirror: + @patch("cephadm.serve.CephadmServe._run_cephadm") + def test_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec('cephfs-mirror')): + cephadm_module.assert_issued_mon_command({ + 'prefix': 'mgr module enable', + 'module': 'mirroring' + }) diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py new file mode 100644 index 000000000..54aa0a7ab --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_spec.py @@ -0,0 +1,600 @@ +# Disable autopep8 for this file: + +# fmt: off + +import json + +import pytest + +from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec, RGWSpec, \ + IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec +from orchestrator import DaemonDescription, OrchestratorError + + +@pytest.mark.parametrize( + "spec_json", + json.loads("""[ +{ + "placement": { + "count": 1 + }, + "service_type": "alertmanager" +}, +{ + "placement": { + "host_pattern": "*" + }, + "service_type": "crash" +}, +{ + "placement": { + "count": 1 + }, + "service_type": "grafana" +}, +{ + "placement": { + "count": 2 + }, + "service_type": "mgr" +}, +{ + "placement": { + "count": 5 + }, + "service_type": "mon" +}, +{ + "placement": { + "host_pattern": "*" + }, + "service_type": "node-exporter" +}, +{ + "placement": { + "count": 1 + }, + "service_type": "prometheus" +}, +{ + "placement": { + "hosts": [ + { + "hostname": "ceph-001", + "network": "", + "name": "" + } + ] + }, + "service_type": "rgw", + "service_id": "default-rgw-realm.eu-central-1.1", + "rgw_realm": "default-rgw-realm", + "rgw_zone": "eu-central-1" +}, +{ + "service_type": "osd", + "service_id": "osd_spec_default", + "placement": { + "host_pattern": "*" + }, + "data_devices": { + "model": "MC-55-44-XZ" + }, + "db_devices": { + "model": "SSD-123-foo" + }, + "wal_devices": { + "model": "NVME-QQQQ-987" + } +} +] +""") +) +def test_spec_octopus(spec_json): + # https://tracker.ceph.com/issues/44934 + # Those are real user data from early octopus. + # Please do not modify those JSON values. + + spec = ServiceSpec.from_json(spec_json) + + # just some verification that we can sill read old octopus specs + def convert_to_old_style_json(j): + j_c = dict(j.copy()) + j_c.pop('service_name', None) + if 'spec' in j_c: + spec = j_c.pop('spec') + j_c.update(spec) + if 'placement' in j_c: + if 'hosts' in j_c['placement']: + j_c['placement']['hosts'] = [ + { + 'hostname': HostPlacementSpec.parse(h).hostname, + 'network': HostPlacementSpec.parse(h).network, + 'name': HostPlacementSpec.parse(h).name + } + for h in j_c['placement']['hosts'] + ] + j_c.pop('objectstore', None) + j_c.pop('filter_logic', None) + return j_c + + assert spec_json == convert_to_old_style_json(spec.to_json()) + + +@pytest.mark.parametrize( + "dd_json", + json.loads("""[ + { + "hostname": "ceph-001", + "container_id": "d94d7969094d", + "container_image_id": "0881eb8f169f5556a292b4e2c01d683172b12830a62a9225a98a8e206bb734f0", + "container_image_name": "docker.io/prom/alertmanager:latest", + "daemon_id": "ceph-001", + "daemon_type": "alertmanager", + "version": "0.20.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725856", + "created": "2020-04-02T19:23:08.829543", + "started": "2020-04-03T07:29:16.932838", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "c4b036202241", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001", + "daemon_type": "crash", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725903", + "created": "2020-04-02T19:23:11.390694", + "started": "2020-04-03T07:29:16.910897", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "5b7b94b48f31", + "container_image_id": "87a51ecf0b1c9a7b187b21c1b071425dafea0d765a96d5bc371c791169b3d7f4", + "container_image_name": "docker.io/ceph/ceph-grafana:latest", + "daemon_id": "ceph-001", + "daemon_type": "grafana", + "version": "6.6.2", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725950", + "created": "2020-04-02T19:23:52.025088", + "started": "2020-04-03T07:29:16.847972", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "9ca007280456", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001.gkjwqp", + "daemon_type": "mgr", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725807", + "created": "2020-04-02T19:22:18.648584", + "started": "2020-04-03T07:29:16.856153", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "3d1ba9a2b697", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "ceph-001", + "daemon_type": "mon", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725715", + "created": "2020-04-02T19:22:13.863300", + "started": "2020-04-03T07:29:17.206024", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "36d026c68ba1", + "container_image_id": "e5a616e4b9cf68dfcad7782b78e118be4310022e874d52da85c55923fb615f87", + "container_image_name": "docker.io/prom/node-exporter:latest", + "daemon_id": "ceph-001", + "daemon_type": "node-exporter", + "version": "0.18.1", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.725996", + "created": "2020-04-02T19:23:53.880197", + "started": "2020-04-03T07:29:16.880044", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "faf76193cbfe", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "0", + "daemon_type": "osd", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726088", + "created": "2020-04-02T20:35:02.991435", + "started": "2020-04-03T07:29:19.373956", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "f82505bae0f1", + "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1", + "container_image_name": "docker.io/ceph/ceph:v15", + "daemon_id": "1", + "daemon_type": "osd", + "version": "15.2.0", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726134", + "created": "2020-04-02T20:35:17.142272", + "started": "2020-04-03T07:29:19.374002", + "is_active": false + }, + { + "hostname": "ceph-001", + "container_id": "2708d84cd484", + "container_image_id": "358a0d2395fe711bb8258e8fb4b2d7865c0a9a6463969bcd1452ee8869ea6653", + "container_image_name": "docker.io/prom/prometheus:latest", + "daemon_id": "ceph-001", + "daemon_type": "prometheus", + "version": "2.17.1", + "status": 1, + "status_desc": "running", + "last_refresh": "2020-04-03T15:31:48.726042", + "created": "2020-04-02T19:24:10.281163", + "started": "2020-04-03T07:29:16.926292", + "is_active": false + }, + { + "hostname": "ceph-001", + "daemon_id": "default-rgw-realm.eu-central-1.1.ceph-001.ytywjo", + "daemon_type": "rgw", + "status": 1, + "status_desc": "starting", + "is_active": false + } +]""") +) +def test_dd_octopus(dd_json): + # https://tracker.ceph.com/issues/44934 + # Those are real user data from early octopus. + # Please do not modify those JSON values. + + # Convert datetime properties to old style. + # 2020-04-03T07:29:16.926292Z -> 2020-04-03T07:29:16.926292 + def convert_to_old_style_json(j): + for k in ['last_refresh', 'created', 'started', 'last_deployed', + 'last_configured']: + if k in j: + j[k] = j[k].rstrip('Z') + del j['daemon_name'] + return j + + assert dd_json == convert_to_old_style_json( + DaemonDescription.from_json(dd_json).to_json()) + + +@pytest.mark.parametrize("spec,dd,valid", +[ # noqa: E128 + # https://tracker.ceph.com/issues/44934 + ( + RGWSpec( + service_id="foo", + rgw_realm="default-rgw-realm", + rgw_zone="eu-central-1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.ceph-001.ytywjo", + hostname="ceph-001", + ), + True + ), + ( + # no realm + RGWSpec( + service_id="foo.bar", + rgw_zone="eu-central-1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.bar.ceph-001.ytywjo", + hostname="ceph-001", + ), + True + ), + ( + # no realm or zone + RGWSpec( + service_id="bar", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="bar.host.domain.tld.ytywjo", + hostname="host.domain.tld", + ), + True + ), + ( + # explicit naming + RGWSpec( + service_id="realm.zone", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="realm.zone.a", + hostname="smithi028", + ), + True + ), + ( + # without host + RGWSpec( + service_type='rgw', + service_id="foo", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="foo.hostname.ytywjo", + hostname=None, + ), + False + ), + ( + # without host (2) + RGWSpec( + service_type='rgw', + service_id="default-rgw-realm.eu-central-1.1", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="default-rgw-realm.eu-central-1.1.hostname.ytywjo", + hostname=None, + ), + False + ), + ( + # service_id contains hostname + # (sort of) https://tracker.ceph.com/issues/45294 + RGWSpec( + service_id="default.rgw.realm.ceph.001", + ), + DaemonDescription( + daemon_type='rgw', + daemon_id="default.rgw.realm.ceph.001.ceph.001.ytywjo", + hostname="ceph.001", + ), + True + ), + + # https://tracker.ceph.com/issues/45293 + ( + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.host1.abc123", + hostname="host1", + ), + True + ), + ( + # '.' char in service_id + ServiceSpec( + service_type='mds', + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + + # https://tracker.ceph.com/issues/45617 + ( + # daemon_id does not contain hostname + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a", + hostname="host1", + ), + True + ), + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='mds', + service_id="host1", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="host1", + hostname="host1", + ), + True + ), + + # https://tracker.ceph.com/issues/45399 + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='mds', + service_id="a", + ), + DaemonDescription( + daemon_type='mds', + daemon_id="a.host1.abc123", + hostname="host1.site", + ), + True + ), + ( + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1", + hostname="host1.site", + ), + True + ), + + # https://tracker.ceph.com/issues/45293 + ( + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1", + hostname="host1", + ), + True + ), + ( + # service_id contains a '.' char + NFSServiceSpec( + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.b.c.host1", + hostname="host1", + ), + True + ), + ( + # trailing chars after hostname + NFSServiceSpec( + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + ( + # chars after hostname without '.' + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="a.host1abc123", + hostname="host1", + ), + False + ), + ( + # chars before hostname without '.' + NFSServiceSpec( + service_id="a", + ), + DaemonDescription( + daemon_type='nfs', + daemon_id="ahost1.abc123", + hostname="host1", + ), + False + ), + + # https://tracker.ceph.com/issues/45293 + ( + IscsiServiceSpec( + service_type='iscsi', + service_id="a", + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="a.host1.abc123", + hostname="host1", + ), + True + ), + ( + # '.' char in service_id + IscsiServiceSpec( + service_type='iscsi', + service_id="a.b.c", + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="a.b.c.host1.abc123", + hostname="host1", + ), + True + ), + ( + # fixed daemon id for teuthology. + IscsiServiceSpec( + service_type='iscsi', + service_id='iscsi', + ), + DaemonDescription( + daemon_type='iscsi', + daemon_id="iscsi.a", + hostname="host1", + ), + True + ), + + ( + CustomContainerSpec( + service_type='container', + service_id='hello-world', + image='docker.io/library/hello-world:latest', + ), + DaemonDescription( + daemon_type='container', + daemon_id='hello-world.mgr0', + hostname='mgr0', + ), + True + ), + + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='cephadm-exporter', + ), + DaemonDescription( + daemon_type='cephadm-exporter', + daemon_id="testhost", + hostname="testhost", + ), + True + ), +]) +def test_daemon_description_service_name(spec: ServiceSpec, + dd: DaemonDescription, + valid: bool): + if valid: + assert spec.service_name() == dd.service_name() + else: + with pytest.raises(OrchestratorError): + dd.service_name() diff --git a/src/pybind/mgr/cephadm/tests/test_template.py b/src/pybind/mgr/cephadm/tests/test_template.py new file mode 100644 index 000000000..f67304348 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_template.py @@ -0,0 +1,33 @@ +import pathlib + +import pytest + +from cephadm.template import TemplateMgr, UndefinedError, TemplateNotFoundError + + +def test_render(cephadm_module, fs): + template_base = (pathlib.Path(__file__).parent / '../templates').resolve() + fake_template = template_base / 'foo/bar' + fs.create_file(fake_template, contents='{{ cephadm_managed }}{{ var }}') + + template_mgr = TemplateMgr(cephadm_module) + value = 'test' + + # with base context + expected_text = '{}{}'.format(template_mgr.base_context['cephadm_managed'], value) + assert template_mgr.render('foo/bar', {'var': value}) == expected_text + + # without base context + with pytest.raises(UndefinedError): + template_mgr.render('foo/bar', {'var': value}, managed_context=False) + + # override the base context + context = { + 'cephadm_managed': 'abc', + 'var': value + } + assert template_mgr.render('foo/bar', context) == 'abc{}'.format(value) + + # template not found + with pytest.raises(TemplateNotFoundError): + template_mgr.render('foo/bar/2', {}) diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py new file mode 100644 index 000000000..9368f4dc3 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py @@ -0,0 +1,322 @@ +import json +from unittest import mock + +import pytest + +from ceph.deployment.service_spec import PlacementSpec, ServiceSpec +from cephadm import CephadmOrchestrator +from cephadm.upgrade import CephadmUpgrade +from cephadm.serve import CephadmServe +from orchestrator import OrchestratorError, DaemonDescription +from .fixtures import _run_cephadm, wait, with_host, with_service + +from typing import List, Tuple, Optional + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_upgrade_start(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'test'): + with with_host(cephadm_module, 'test2'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True): + assert wait(cephadm_module, cephadm_module.upgrade_start( + 'image_id', None)) == 'Initiating upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_status() + ).target_image == 'image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_pause() + ) == 'Paused upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_resume() + ) == 'Resumed upgrade to image_id' + + assert wait(cephadm_module, cephadm_module.upgrade_stop() + ) == 'Stopped upgrade to image_id' + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@pytest.mark.parametrize("use_repo_digest", + [ + False, + True + ]) +def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + with with_host(cephadm_module, 'host2'): + cephadm_module.set_container_image('global', 'from_image') + cephadm_module.use_repo_digest = use_repo_digest + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)), + CephadmOrchestrator.apply_mgr, '', status_running=True),\ + mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name", + return_value='foo'),\ + mock.patch("cephadm.module.CephadmOrchestrator.version", + new_callable=mock.PropertyMock) as version_mock,\ + mock.patch("cephadm.module.CephadmOrchestrator.get", + return_value={ + # capture fields in both mon and osd maps + "require_osd_release": "pacific", + "min_mon_release": 16, + }): + version_mock.return_value = 'ceph version 18.2.1 (somehash)' + assert wait(cephadm_module, cephadm_module.upgrade_start( + 'to_image', None)) == 'Initiating upgrade to to_image' + + assert wait(cephadm_module, cephadm_module.upgrade_status() + ).target_image == 'to_image' + + def _versions_mock(cmd): + return json.dumps({ + 'mgr': { + 'ceph version 1.2.3 (asdf) blah': 1 + } + }) + + cephadm_module._mon_command_mock_versions = _versions_mock + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['to_image@repo_digest'], + 'ceph_version': 'ceph version 18.2.3 (hash)', + }))): + + cephadm_module.upgrade._do_upgrade() + + assert cephadm_module.upgrade_status is not None + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm( + json.dumps([ + dict( + name=list(cephadm_module.cache.daemons['host1'].keys())[0], + style='cephadm', + fsid='fsid', + container_id='container_id', + container_image_id='image_id', + container_image_digests=['to_image@repo_digest'], + deployed_by=['to_image@repo_digest'], + version='version', + state='running', + ) + ]) + )): + CephadmServe(cephadm_module)._refresh_hosts_and_daemons() + + with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({ + 'image_id': 'image_id', + 'repo_digests': ['to_image@repo_digest'], + 'ceph_version': 'ceph version 18.2.3 (hash)', + }))): + cephadm_module.upgrade._do_upgrade() + + _, image, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': 'global', + 'key': 'container_image', + }) + if use_repo_digest: + assert image == 'to_image@repo_digest' + else: + assert image == 'to_image' + + +def test_upgrade_state_null(cephadm_module: CephadmOrchestrator): + # This test validates https://tracker.ceph.com/issues/47580 + cephadm_module.set_store('upgrade_state', 'null') + CephadmUpgrade(cephadm_module) + assert CephadmUpgrade(cephadm_module).upgrade_state is None + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +def test_not_enough_mgrs(cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=1)), CephadmOrchestrator.apply_mgr, ''): + with pytest.raises(OrchestratorError): + wait(cephadm_module, cephadm_module.upgrade_start('image_id', None)) + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.CephadmOrchestrator.check_mon_command") +def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOrchestrator): + # only 2 monitors, not enough for ok-to-stop to ever pass + check_mon_command.return_value = ( + 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}]}}', '') + assert not cephadm_module.upgrade._enough_mons_for_ok_to_stop() + + # 3 monitors, ok-to-stop should work fine + check_mon_command.return_value = ( + 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '') + assert cephadm_module.upgrade._enough_mons_for_ok_to_stop() + + +@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) +@mock.patch("cephadm.module.HostCache.get_daemons_by_service") +@mock.patch("cephadm.CephadmOrchestrator.get") +def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator): + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription()]] + assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test')) + + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]] + assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test')) + + get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}] + get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]] + assert cephadm_module.upgrade._enough_mds_for_ok_to_stop( + DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test')) + + +@pytest.mark.parametrize( + "upgraded, not_upgraded, daemon_types, hosts, services, should_block", + # [ ([(type, host, id), ... ], [...], [daemon types], [hosts], [services], True/False), ... ] + [ + ( # valid, upgrade mgr daemons + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + ['mgr'], + None, + None, + False + ), + ( # invalid, can't upgrade mons until mgr is upgraded + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + ['mon'], + None, + None, + True + ), + ( # invalid, can't upgrade mon service until all mgr daemons are upgraded + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + None, + None, + ['mon'], + True + ), + ( # valid, upgrade mgr service + [], + [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')], + None, + None, + ['mgr'], + False + ), + ( # valid, mgr is already upgraded so can upgrade mons + [('mgr', 'a', 'a.x')], + [('mon', 'a', 'a')], + ['mon'], + None, + None, + False + ), + ( # invalid, can't upgrade all daemons on b b/c un-upgraded mgr on a + [], + [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['a'], + None, + True + ), + ( # valid, only daemon on b is a mgr + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['b'], + None, + False + ), + ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['a'], + None, + True + ), + ( # valid, only upgrading the mgr on a + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + ['mgr'], + ['a'], + None, + False + ), + ( # valid, mgr daemon not on b are upgraded + [('mgr', 'a', 'a.x')], + [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + None, + ['b'], + None, + False + ), + ( # valid, all the necessary hosts are covered, mgr on c is already upgraded + [('mgr', 'c', 'c.z')], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a'), ('osd', 'c', '0')], + None, + ['a', 'b'], + None, + False + ), + ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded + [], + [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')], + ['mgr', 'mon'], + ['a'], + None, + True + ), + ( # valid, only mon not on "b" is upgraded already. Case hit while making teuthology test + [('mon', 'a', 'a')], + [('mon', 'b', 'x'), ('mon', 'b', 'y'), ('osd', 'a', '1'), ('osd', 'b', '2')], + ['mon', 'osd'], + ['b'], + None, + False + ), + ] +) +@mock.patch("cephadm.module.HostCache.get_daemons") +@mock.patch("cephadm.serve.CephadmServe._get_container_image_info") +@mock.patch('cephadm.module.SpecStore.__getitem__') +def test_staggered_upgrade_validation( + get_spec, + get_image_info, + get_daemons, + upgraded: List[Tuple[str, str, str]], + not_upgraded: List[Tuple[str, str, str, str]], + daemon_types: Optional[str], + hosts: Optional[str], + services: Optional[str], + should_block: bool, + cephadm_module: CephadmOrchestrator, +): + def to_dds(ts: List[Tuple[str, str]], upgraded: bool) -> List[DaemonDescription]: + dds = [] + digest = 'new_image@repo_digest' if upgraded else 'old_image@repo_digest' + for t in ts: + dds.append(DaemonDescription(daemon_type=t[0], + hostname=t[1], + daemon_id=t[2], + container_image_digests=[digest], + deployed_by=[digest],)) + return dds + get_daemons.return_value = to_dds(upgraded, True) + to_dds(not_upgraded, False) + get_image_info.return_value = ('new_id', 'ceph version 99.99.99 (hash)', ['new_image@repo_digest']) + + class FakeSpecDesc(): + def __init__(self, spec): + self.spec = spec + + def _get_spec(s): + return FakeSpecDesc(ServiceSpec(s)) + + get_spec.side_effect = _get_spec + if should_block: + with pytest.raises(OrchestratorError): + cephadm_module.upgrade._validate_upgrade_filters( + 'new_image_name', daemon_types, hosts, services) + else: + cephadm_module.upgrade._validate_upgrade_filters( + 'new_image_name', daemon_types, hosts, services) diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py new file mode 100644 index 000000000..db39fe76a --- /dev/null +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -0,0 +1,1167 @@ +import json +import logging +import time +import uuid +from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any + +import orchestrator +from cephadm.registry import Registry +from cephadm.serve import CephadmServe +from cephadm.services.cephadmservice import CephadmDaemonDeploySpec +from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER, \ + MONITORING_STACK_TYPES, CEPH_TYPES, GATEWAY_TYPES +from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, daemon_type_to_service + +if TYPE_CHECKING: + from .module import CephadmOrchestrator + + +logger = logging.getLogger(__name__) + +# from ceph_fs.h +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1 << 5) + + +def normalize_image_digest(digest: str, default_registry: str) -> str: + """ + Normal case: + >>> normalize_image_digest('ceph/ceph', 'docker.io') + 'docker.io/ceph/ceph' + + No change: + >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io') + 'quay.ceph.io/ceph/ceph' + + >>> normalize_image_digest('docker.io/ubuntu', 'docker.io') + 'docker.io/ubuntu' + + >>> normalize_image_digest('localhost/ceph', 'docker.io') + 'localhost/ceph' + """ + known_shortnames = [ + 'ceph/ceph', + 'ceph/daemon', + 'ceph/daemon-base', + ] + for image in known_shortnames: + if digest.startswith(image): + return f'{default_registry}/{digest}' + return digest + + +class UpgradeState: + def __init__(self, + target_name: str, + progress_id: str, + target_id: Optional[str] = None, + target_digests: Optional[List[str]] = None, + target_version: Optional[str] = None, + error: Optional[str] = None, + paused: Optional[bool] = None, + fs_original_max_mds: Optional[Dict[str, int]] = None, + fs_original_allow_standby_replay: Optional[Dict[str, bool]] = None, + daemon_types: Optional[List[str]] = None, + hosts: Optional[List[str]] = None, + services: Optional[List[str]] = None, + total_count: Optional[int] = None, + remaining_count: Optional[int] = None, + ): + self._target_name: str = target_name # Use CephadmUpgrade.target_image instead. + self.progress_id: str = progress_id + self.target_id: Optional[str] = target_id + self.target_digests: Optional[List[str]] = target_digests + self.target_version: Optional[str] = target_version + self.error: Optional[str] = error + self.paused: bool = paused or False + self.fs_original_max_mds: Optional[Dict[str, int]] = fs_original_max_mds + self.fs_original_allow_standby_replay: Optional[Dict[str, + bool]] = fs_original_allow_standby_replay + self.daemon_types = daemon_types + self.hosts = hosts + self.services = services + self.total_count = total_count + self.remaining_count = remaining_count + + def to_json(self) -> dict: + return { + 'target_name': self._target_name, + 'progress_id': self.progress_id, + 'target_id': self.target_id, + 'target_digests': self.target_digests, + 'target_version': self.target_version, + 'fs_original_max_mds': self.fs_original_max_mds, + 'fs_original_allow_standby_replay': self.fs_original_allow_standby_replay, + 'error': self.error, + 'paused': self.paused, + 'daemon_types': self.daemon_types, + 'hosts': self.hosts, + 'services': self.services, + 'total_count': self.total_count, + 'remaining_count': self.remaining_count, + } + + @classmethod + def from_json(cls, data: dict) -> Optional['UpgradeState']: + valid_params = UpgradeState.__init__.__code__.co_varnames + if data: + c = {k: v for k, v in data.items() if k in valid_params} + if 'repo_digest' in c: + c['target_digests'] = [c.pop('repo_digest')] + return cls(**c) + else: + return None + + +class CephadmUpgrade: + UPGRADE_ERRORS = [ + 'UPGRADE_NO_STANDBY_MGR', + 'UPGRADE_FAILED_PULL', + 'UPGRADE_REDEPLOY_DAEMON', + 'UPGRADE_BAD_TARGET_VERSION', + 'UPGRADE_EXCEPTION' + ] + + def __init__(self, mgr: "CephadmOrchestrator"): + self.mgr = mgr + + t = self.mgr.get_store('upgrade_state') + if t: + self.upgrade_state: Optional[UpgradeState] = UpgradeState.from_json(json.loads(t)) + else: + self.upgrade_state = None + + @property + def target_image(self) -> str: + assert self.upgrade_state + if not self.mgr.use_repo_digest: + return self.upgrade_state._target_name + if not self.upgrade_state.target_digests: + return self.upgrade_state._target_name + + # FIXME: we assume the first digest is the best one to use + return self.upgrade_state.target_digests[0] + + def upgrade_status(self) -> orchestrator.UpgradeStatusSpec: + r = orchestrator.UpgradeStatusSpec() + if self.upgrade_state: + r.target_image = self.target_image + r.in_progress = True + r.progress, r.services_complete = self._get_upgrade_info() + r.is_paused = self.upgrade_state.paused + + if self.upgrade_state.daemon_types is not None: + which_str = f'Upgrading daemons of type(s) {",".join(self.upgrade_state.daemon_types)}' + if self.upgrade_state.hosts is not None: + which_str += f' on host(s) {",".join(self.upgrade_state.hosts)}' + elif self.upgrade_state.services is not None: + which_str = f'Upgrading daemons in service(s) {",".join(self.upgrade_state.services)}' + if self.upgrade_state.hosts is not None: + which_str += f' on host(s) {",".join(self.upgrade_state.hosts)}' + elif self.upgrade_state.hosts is not None: + which_str = f'Upgrading all daemons on host(s) {",".join(self.upgrade_state.hosts)}' + else: + which_str = 'Upgrading all daemon types on all hosts' + if self.upgrade_state.total_count is not None and self.upgrade_state.remaining_count is not None: + which_str += f'. Upgrade limited to {self.upgrade_state.total_count} daemons ({self.upgrade_state.remaining_count} remaining).' + r.which = which_str + + # accessing self.upgrade_info_str will throw an exception if it + # has not been set in _do_upgrade yet + try: + r.message = self.upgrade_info_str + except AttributeError: + pass + if self.upgrade_state.error: + r.message = 'Error: ' + self.upgrade_state.error + elif self.upgrade_state.paused: + r.message = 'Upgrade paused' + return r + + def _get_upgrade_info(self) -> Tuple[str, List[str]]: + if not self.upgrade_state or not self.upgrade_state.target_digests: + return '', [] + + daemons = self._get_filtered_daemons() + + if any(not d.container_image_digests for d in daemons if d.daemon_type == 'mgr'): + return '', [] + + completed_daemons = [(d.daemon_type, any(d in self.upgrade_state.target_digests for d in ( + d.container_image_digests or []))) for d in daemons if d.daemon_type] + + done = len([True for completion in completed_daemons if completion[1]]) + + completed_types = list(set([completion[0] for completion in completed_daemons if all( + c[1] for c in completed_daemons if c[0] == completion[0])])) + + return '%s/%s daemons upgraded' % (done, len(daemons)), completed_types + + def _get_filtered_daemons(self) -> List[DaemonDescription]: + # Return the set of daemons set to be upgraded with out current + # filtering parameters (or all daemons in upgrade order if no filtering + # parameter are set). + assert self.upgrade_state is not None + if self.upgrade_state.daemon_types is not None: + daemons = [d for d in self.mgr.cache.get_daemons( + ) if d.daemon_type in self.upgrade_state.daemon_types] + elif self.upgrade_state.services is not None: + daemons = [] + for service in self.upgrade_state.services: + daemons += self.mgr.cache.get_daemons_by_service(service) + else: + daemons = [d for d in self.mgr.cache.get_daemons( + ) if d.daemon_type in CEPH_UPGRADE_ORDER] + if self.upgrade_state.hosts is not None: + daemons = [d for d in daemons if d.hostname in self.upgrade_state.hosts] + return daemons + + def _get_current_version(self) -> Tuple[int, int, str]: + current_version = self.mgr.version.split('ceph version ')[1] + (current_major, current_minor, _) = current_version.split('-')[0].split('.', 2) + return (int(current_major), int(current_minor), current_version) + + def _check_target_version(self, version: str) -> Optional[str]: + try: + (major, minor, _) = version.split('.', 2) + assert int(minor) >= 0 + # patch might be a number or {number}-g{sha1} + except ValueError: + return 'version must be in the form X.Y.Z (e.g., 15.2.3)' + if int(major) < 15 or (int(major) == 15 and int(minor) < 2): + return 'cephadm only supports octopus (15.2.0) or later' + + # to far a jump? + current_version = self.mgr.version.split('ceph version ')[1] + (current_major, current_minor, _) = current_version.split('-')[0].split('.', 2) + if int(current_major) < int(major) - 2: + return f'ceph can only upgrade 1 or 2 major versions at a time; {current_version} -> {version} is too big a jump' + if int(current_major) > int(major): + return f'ceph cannot downgrade major versions (from {current_version} to {version})' + if int(current_major) == int(major): + if int(current_minor) > int(minor): + return f'ceph cannot downgrade to a {"rc" if minor == "1" else "dev"} release' + + # check mon min + monmap = self.mgr.get("mon_map") + mon_min = monmap.get("min_mon_release", 0) + if mon_min < int(major) - 2: + return f'min_mon_release ({mon_min}) < target {major} - 2; first complete an upgrade to an earlier release' + + # check osd min + osdmap = self.mgr.get("osd_map") + osd_min_name = osdmap.get("require_osd_release", "argonaut") + osd_min = ceph_release_to_major(osd_min_name) + if osd_min < int(major) - 2: + return f'require_osd_release ({osd_min_name} or {osd_min}) < target {major} - 2; first complete an upgrade to an earlier release' + + return None + + def upgrade_ls(self, image: Optional[str], tags: bool) -> Dict: + if not image: + image = self.mgr.container_image_base + reg_name, bare_image = image.split('/', 1) + reg = Registry(reg_name) + versions = [] + r: Dict[Any, Any] = { + "image": image, + "registry": reg_name, + "bare_image": bare_image, + } + + try: + ls = reg.get_tags(bare_image) + except ValueError as e: + raise OrchestratorError(f'{e}') + if not tags: + for t in ls: + if t[0] != 'v': + continue + v = t[1:].split('.') + if len(v) != 3: + continue + if '-' in v[2]: + continue + versions.append('.'.join(v)) + r["versions"] = sorted( + versions, + key=lambda k: list(map(int, k.split('.'))), + reverse=True + ) + else: + r["tags"] = sorted(ls) + return r + + def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, + hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, limit: Optional[int] = None) -> str: + if self.mgr.mode != 'root': + raise OrchestratorError('upgrade is not supported in %s mode' % ( + self.mgr.mode)) + if version: + version_error = self._check_target_version(version) + if version_error: + raise OrchestratorError(version_error) + target_name = self.mgr.container_image_base + ':v' + version + elif image: + target_name = normalize_image_digest(image, self.mgr.default_registry) + else: + raise OrchestratorError('must specify either image or version') + + if daemon_types is not None or services is not None or hosts is not None: + self._validate_upgrade_filters(target_name, daemon_types, hosts, services) + + if self.upgrade_state: + if self.upgrade_state._target_name != target_name: + raise OrchestratorError( + 'Upgrade to %s (not %s) already in progress' % + (self.upgrade_state._target_name, target_name)) + if self.upgrade_state.paused: + self.upgrade_state.paused = False + self._save_upgrade_state() + return 'Resumed upgrade to %s' % self.target_image + return 'Upgrade to %s in progress' % self.target_image + + running_mgr_count = len([daemon for daemon in self.mgr.cache.get_daemons_by_type( + 'mgr') if daemon.status == DaemonDescriptionStatus.running]) + + if running_mgr_count < 2: + raise OrchestratorError('Need at least 2 running mgr daemons for upgrade') + + self.mgr.log.info('Upgrade: Started with target %s' % target_name) + self.upgrade_state = UpgradeState( + target_name=target_name, + progress_id=str(uuid.uuid4()), + daemon_types=daemon_types, + hosts=hosts, + services=services, + total_count=limit, + remaining_count=limit, + ) + self._update_upgrade_progress(0.0) + self._save_upgrade_state() + self._clear_upgrade_health_checks() + self.mgr.event.set() + return 'Initiating upgrade to %s' % (target_name) + + def _validate_upgrade_filters(self, target_name: str, daemon_types: Optional[List[str]] = None, hosts: Optional[List[str]] = None, services: Optional[List[str]] = None) -> None: + def _latest_type(dtypes: List[str]) -> str: + # [::-1] gives the list in reverse + for daemon_type in CEPH_UPGRADE_ORDER[::-1]: + if daemon_type in dtypes: + return daemon_type + return '' + + def _get_earlier_daemons(dtypes: List[str], candidates: List[DaemonDescription]) -> List[DaemonDescription]: + # this function takes a list of daemon types and first finds the daemon + # type from that list that is latest in our upgrade order. Then, from + # that latest type, it filters the list of candidate daemons received + # for daemons with types earlier in the upgrade order than the latest + # type found earlier. That filtered list of daemons is returned. The + # purpose of this function is to help in finding daemons that must have + # already been upgraded for the given filtering parameters (--daemon-types, + # --services, --hosts) to be valid. + latest = _latest_type(dtypes) + if not latest: + return [] + earlier_types = '|'.join(CEPH_UPGRADE_ORDER).split(latest)[0].split('|')[:-1] + earlier_types = [t for t in earlier_types if t not in dtypes] + return [d for d in candidates if d.daemon_type in earlier_types] + + if self.upgrade_state: + raise OrchestratorError('Cannot set values for --daemon-types, --services or --hosts when upgrade already in progress.') + try: + target_id, target_version, target_digests = CephadmServe(self.mgr)._get_container_image_info(target_name) + except OrchestratorError as e: + raise OrchestratorError(f'Failed to pull {target_name}: {str(e)}') + # what we need to do here is build a list of daemons that must already be upgraded + # in order for the user's selection of daemons to upgrade to be valid. for example, + # if they say --daemon-types 'osd,mds' but mons have not been upgraded, we block. + daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type not in MONITORING_STACK_TYPES] + err_msg_base = 'Cannot start upgrade. ' + # "dtypes" will later be filled in with the types of daemons that will be upgraded with the given parameters + dtypes = [] + if daemon_types is not None: + dtypes = daemon_types + if hosts is not None: + dtypes = [_latest_type(dtypes)] + other_host_daemons = [ + d for d in daemons if d.hostname is not None and d.hostname not in hosts] + daemons = _get_earlier_daemons(dtypes, other_host_daemons) + else: + daemons = _get_earlier_daemons(dtypes, daemons) + err_msg_base += 'Daemons with types earlier in upgrade order than given types need upgrading.\n' + elif services is not None: + # for our purposes here we can effectively convert our list of services into the + # set of daemon types the services contain. This works because we don't allow --services + # and --daemon-types at the same time and we only allow services of the same type + sspecs = [self.mgr.spec_store[s].spec for s in services if self.mgr.spec_store[s].spec is not None] + stypes = list(set([s.service_type for s in sspecs])) + if len(stypes) != 1: + raise OrchestratorError('Doing upgrade by service only support services of one type at ' + f'a time. Found service types: {stypes}') + for stype in stypes: + dtypes += orchestrator.service_to_daemon_types(stype) + dtypes = list(set(dtypes)) + if hosts is not None: + other_host_daemons = [ + d for d in daemons if d.hostname is not None and d.hostname not in hosts] + daemons = _get_earlier_daemons(dtypes, other_host_daemons) + else: + daemons = _get_earlier_daemons(dtypes, daemons) + err_msg_base += 'Daemons with types earlier in upgrade order than daemons from given services need upgrading.\n' + elif hosts is not None: + # hosts must be handled a bit differently. For this, we really need to find all the daemon types + # that reside on hosts in the list of hosts we will upgrade. Then take the type from + # that list that is latest in the upgrade order and check if any daemons on hosts not in the + # provided list of hosts have a daemon with a type earlier in the upgrade order that is not upgraded. + dtypes = list(set([d.daemon_type for d in daemons if d.daemon_type is not None and d.hostname in hosts])) + other_hosts_daemons = [d for d in daemons if d.hostname is not None and d.hostname not in hosts] + daemons = _get_earlier_daemons([_latest_type(dtypes)], other_hosts_daemons) + err_msg_base += 'Daemons with types earlier in upgrade order than daemons on given host need upgrading.\n' + need_upgrade_self, n1, n2, _ = self._detect_need_upgrade(daemons, target_digests) + if need_upgrade_self and ('mgr' not in dtypes or (daemon_types is None and services is None)): + # also report active mgr as needing to be upgraded. It is not included in the resulting list + # by default as it is treated special and handled via the need_upgrade_self bool + n1.insert(0, (self.mgr.mgr_service.get_active_daemon(self.mgr.cache.get_daemons_by_type('mgr')), True)) + if n1 or n2: + raise OrchestratorError(f'{err_msg_base}Please first upgrade ' + f'{", ".join(list(set([d[0].name() for d in n1] + [d[0].name() for d in n2])))}\n' + f'NOTE: Enforced upgrade order is: {" -> ".join(CEPH_TYPES + GATEWAY_TYPES)}') + + def upgrade_pause(self) -> str: + if not self.upgrade_state: + raise OrchestratorError('No upgrade in progress') + if self.upgrade_state.paused: + return 'Upgrade to %s already paused' % self.target_image + self.upgrade_state.paused = True + self.mgr.log.info('Upgrade: Paused upgrade to %s' % self.target_image) + self._save_upgrade_state() + return 'Paused upgrade to %s' % self.target_image + + def upgrade_resume(self) -> str: + if not self.upgrade_state: + raise OrchestratorError('No upgrade in progress') + if not self.upgrade_state.paused: + return 'Upgrade to %s not paused' % self.target_image + self.upgrade_state.paused = False + self.upgrade_state.error = '' + self.mgr.log.info('Upgrade: Resumed upgrade to %s' % self.target_image) + self._save_upgrade_state() + self.mgr.event.set() + return 'Resumed upgrade to %s' % self.target_image + + def upgrade_stop(self) -> str: + if not self.upgrade_state: + return 'No upgrade in progress' + if self.upgrade_state.progress_id: + self.mgr.remote('progress', 'complete', + self.upgrade_state.progress_id) + target_image = self.target_image + self.mgr.log.info('Upgrade: Stopped') + self.upgrade_state = None + self._save_upgrade_state() + self._clear_upgrade_health_checks() + self.mgr.event.set() + return 'Stopped upgrade to %s' % target_image + + def continue_upgrade(self) -> bool: + """ + Returns false, if nothing was done. + :return: + """ + if self.upgrade_state and not self.upgrade_state.paused: + try: + self._do_upgrade() + except Exception as e: + self._fail_upgrade('UPGRADE_EXCEPTION', { + 'severity': 'error', + 'summary': 'Upgrade: failed due to an unexpected exception', + 'count': 1, + 'detail': [f'Unexpected exception occurred during upgrade process: {str(e)}'], + }) + return False + return True + return False + + def _wait_for_ok_to_stop( + self, s: DaemonDescription, + known: Optional[List[str]] = None, # NOTE: output argument! + ) -> bool: + # only wait a little bit; the service might go away for something + assert s.daemon_type is not None + assert s.daemon_id is not None + tries = 4 + while tries > 0: + if not self.upgrade_state or self.upgrade_state.paused: + return False + + # setting force flag to retain old functionality. + # note that known is an output argument for ok_to_stop() + r = self.mgr.cephadm_services[daemon_type_to_service(s.daemon_type)].ok_to_stop([ + s.daemon_id], known=known, force=True) + + if not r.retval: + logger.info(f'Upgrade: {r.stdout}') + return True + logger.info(f'Upgrade: {r.stderr}') + + time.sleep(15) + tries -= 1 + return False + + def _clear_upgrade_health_checks(self) -> None: + for k in self.UPGRADE_ERRORS: + if k in self.mgr.health_checks: + del self.mgr.health_checks[k] + self.mgr.set_health_checks(self.mgr.health_checks) + + def _fail_upgrade(self, alert_id: str, alert: dict) -> None: + assert alert_id in self.UPGRADE_ERRORS + if not self.upgrade_state: + # this could happen if the user canceled the upgrade while we + # were doing something + return + + logger.error('Upgrade: Paused due to %s: %s' % (alert_id, + alert['summary'])) + self.upgrade_state.error = alert_id + ': ' + alert['summary'] + self.upgrade_state.paused = True + self._save_upgrade_state() + self.mgr.health_checks[alert_id] = alert + self.mgr.set_health_checks(self.mgr.health_checks) + + def _update_upgrade_progress(self, progress: float) -> None: + if not self.upgrade_state: + assert False, 'No upgrade in progress' + + if not self.upgrade_state.progress_id: + self.upgrade_state.progress_id = str(uuid.uuid4()) + self._save_upgrade_state() + self.mgr.remote('progress', 'update', self.upgrade_state.progress_id, + ev_msg='Upgrade to %s' % ( + self.upgrade_state.target_version or self.target_image + ), + ev_progress=progress, + add_to_ceph_s=True) + + def _save_upgrade_state(self) -> None: + if not self.upgrade_state: + self.mgr.set_store('upgrade_state', None) + return + self.mgr.set_store('upgrade_state', json.dumps(self.upgrade_state.to_json())) + + def get_distinct_container_image_settings(self) -> Dict[str, str]: + # get all distinct container_image settings + image_settings = {} + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config dump', + 'format': 'json', + }) + config = json.loads(out) + for opt in config: + if opt['name'] == 'container_image': + image_settings[opt['section']] = opt['value'] + return image_settings + + def _prepare_for_mds_upgrade( + self, + target_major: str, + need_upgrade: List[DaemonDescription] + ) -> bool: + # scale down all filesystems to 1 MDS + assert self.upgrade_state + if not self.upgrade_state.fs_original_max_mds: + self.upgrade_state.fs_original_max_mds = {} + if not self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay = {} + fsmap = self.mgr.get("fs_map") + continue_upgrade = True + for fs in fsmap.get('filesystems', []): + fscid = fs["id"] + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] + + # disable allow_standby_replay? + if mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY: + self.mgr.log.info('Upgrade: Disabling standby-replay for filesystem %s' % ( + fs_name + )) + if fscid not in self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay[fscid] = True + self._save_upgrade_state() + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '0', + }) + continue_upgrade = False + continue + + # scale down this filesystem? + if mdsmap["max_mds"] > 1: + self.mgr.log.info('Upgrade: Scaling down filesystem %s' % ( + fs_name + )) + if fscid not in self.upgrade_state.fs_original_max_mds: + self.upgrade_state.fs_original_max_mds[fscid] = mdsmap['max_mds'] + self._save_upgrade_state() + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'max_mds', + 'val': '1', + }) + continue_upgrade = False + continue + + if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1): + self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (fs_name)) + time.sleep(10) + continue_upgrade = False + continue + + if len(mdsmap['up']) == 0: + self.mgr.log.warning("Upgrade: No mds is up; continuing upgrade procedure to poke things in the right direction") + # This can happen because the current version MDS have + # incompatible compatsets; the mons will not do any promotions. + # We must upgrade to continue. + elif len(mdsmap['up']) > 0: + mdss = list(mdsmap['info'].values()) + assert len(mdss) == 1 + lone_mds = mdss[0] + if lone_mds['state'] != 'up:active': + self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % ( + lone_mds['name'], + lone_mds['state'], + )) + time.sleep(10) + continue_upgrade = False + continue + else: + assert False + + return continue_upgrade + + def _enough_mons_for_ok_to_stop(self) -> bool: + # type () -> bool + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'quorum_status', + }) + try: + j = json.loads(out) + except Exception: + raise OrchestratorError('failed to parse quorum status') + + mons = [m['name'] for m in j['monmap']['mons']] + return len(mons) > 2 + + def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool: + # type (DaemonDescription) -> bool + + # find fs this mds daemon belongs to + fsmap = self.mgr.get("fs_map") + for fs in fsmap.get('filesystems', []): + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] + + assert mds_daemon.daemon_id + if fs_name != mds_daemon.service_name().split('.', 1)[1]: + # wrong fs for this mds daemon + continue + + # get number of mds daemons for this fs + mds_count = len( + [daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())]) + + # standby mds daemons for this fs? + if mdsmap["max_mds"] < mds_count: + return True + return False + + return True # if mds has no fs it should pass ok-to-stop + + def _detect_need_upgrade(self, daemons: List[DaemonDescription], target_digests: Optional[List[str]] = None) -> Tuple[bool, List[Tuple[DaemonDescription, bool]], List[Tuple[DaemonDescription, bool]], int]: + # this function takes a list of daemons and container digests. The purpose + # is to go through each daemon and check if the current container digests + # for that daemon match the target digests. The purpose being that we determine + # if a daemon is upgraded to a certain container image or not based on what + # container digests it has. By checking the current digests against the + # targets we can determine which daemons still need to be upgraded + need_upgrade_self = False + need_upgrade: List[Tuple[DaemonDescription, bool]] = [] + need_upgrade_deployer: List[Tuple[DaemonDescription, bool]] = [] + done = 0 + if target_digests is None: + target_digests = [] + for d in daemons: + assert d.daemon_type is not None + assert d.daemon_id is not None + assert d.hostname is not None + correct_digest = False + if (any(d in target_digests for d in (d.container_image_digests or [])) + or d.daemon_type in MONITORING_STACK_TYPES): + logger.debug('daemon %s.%s container digest correct' % ( + d.daemon_type, d.daemon_id)) + correct_digest = True + if any(d in target_digests for d in (d.deployed_by or [])): + logger.debug('daemon %s.%s deployed by correct version' % ( + d.daemon_type, d.daemon_id)) + done += 1 + continue + + if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id): + logger.info('Upgrade: Need to upgrade myself (mgr.%s)' % + self.mgr.get_mgr_id()) + need_upgrade_self = True + continue + + if correct_digest: + logger.debug('daemon %s.%s not deployed by correct version' % ( + d.daemon_type, d.daemon_id)) + need_upgrade_deployer.append((d, True)) + else: + logger.debug('daemon %s.%s not correct (%s, %s, %s)' % ( + d.daemon_type, d.daemon_id, + d.container_image_name, d.container_image_digests, d.version)) + need_upgrade.append((d, False)) + + return (need_upgrade_self, need_upgrade, need_upgrade_deployer, done) + + def _to_upgrade(self, need_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str) -> Tuple[bool, List[Tuple[DaemonDescription, bool]]]: + to_upgrade: List[Tuple[DaemonDescription, bool]] = [] + known_ok_to_stop: List[str] = [] + for d_entry in need_upgrade: + d = d_entry[0] + assert d.daemon_type is not None + assert d.daemon_id is not None + assert d.hostname is not None + + if not d.container_image_id: + if d.container_image_name == target_image: + logger.debug( + 'daemon %s has unknown container_image_id but has correct image name' % (d.name())) + continue + + if known_ok_to_stop: + if d.name() in known_ok_to_stop: + logger.info(f'Upgrade: {d.name()} is also safe to restart') + to_upgrade.append(d_entry) + continue + + if d.daemon_type == 'osd': + # NOTE: known_ok_to_stop is an output argument for + # _wait_for_ok_to_stop + if not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return False, to_upgrade + + if d.daemon_type == 'mon' and self._enough_mons_for_ok_to_stop(): + if not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return False, to_upgrade + + if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d): + if not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return False, to_upgrade + + to_upgrade.append(d_entry) + + # if we don't have a list of others to consider, stop now + if d.daemon_type in ['osd', 'mds', 'mon'] and not known_ok_to_stop: + break + return True, to_upgrade + + def _upgrade_daemons(self, to_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str, target_digests: Optional[List[str]] = None) -> None: + assert self.upgrade_state is not None + num = 1 + if target_digests is None: + target_digests = [] + for d_entry in to_upgrade: + if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0 and not d_entry[1]: + self.mgr.log.info(f'Hit upgrade limit of {self.upgrade_state.total_count}. Stopping upgrade') + return + d = d_entry[0] + assert d.daemon_type is not None + assert d.daemon_id is not None + assert d.hostname is not None + + # make sure host has latest container image + out, errs, code = CephadmServe(self.mgr)._run_cephadm( + d.hostname, '', 'inspect-image', [], + image=target_image, no_fsid=True, error_ok=True) + if code or not any(d in target_digests for d in json.loads(''.join(out)).get('repo_digests', [])): + logger.info('Upgrade: Pulling %s on %s' % (target_image, + d.hostname)) + self.upgrade_info_str = 'Pulling %s image on host %s' % ( + target_image, d.hostname) + out, errs, code = CephadmServe(self.mgr)._run_cephadm( + d.hostname, '', 'pull', [], + image=target_image, no_fsid=True, error_ok=True) + if code: + self._fail_upgrade('UPGRADE_FAILED_PULL', { + 'severity': 'warning', + 'summary': 'Upgrade: failed to pull target image', + 'count': 1, + 'detail': [ + 'failed to pull %s on host %s' % (target_image, + d.hostname)], + }) + return + r = json.loads(''.join(out)) + if not any(d in target_digests for d in r.get('repo_digests', [])): + logger.info('Upgrade: image %s pull on %s got new digests %s (not %s), restarting' % ( + target_image, d.hostname, r['repo_digests'], target_digests)) + self.upgrade_info_str = 'Image %s pull on %s got new digests %s (not %s), restarting' % ( + target_image, d.hostname, r['repo_digests'], target_digests) + self.upgrade_state.target_digests = r['repo_digests'] + self._save_upgrade_state() + return + + self.upgrade_info_str = 'Currently upgrading %s daemons' % (d.daemon_type) + + if len(to_upgrade) > 1: + logger.info('Upgrade: Updating %s.%s (%d/%d)' % (d.daemon_type, d.daemon_id, num, min(len(to_upgrade), self.upgrade_state.remaining_count if self.upgrade_state.remaining_count is not None else 9999999))) + else: + logger.info('Upgrade: Updating %s.%s' % + (d.daemon_type, d.daemon_id)) + action = 'Upgrading' if not d_entry[1] else 'Redeploying' + try: + daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d) + self.mgr._daemon_action( + daemon_spec, + 'redeploy', + image=target_image if not d_entry[1] else None + ) + except Exception as e: + self._fail_upgrade('UPGRADE_REDEPLOY_DAEMON', { + 'severity': 'warning', + 'summary': f'{action} daemon {d.name()} on host {d.hostname} failed.', + 'count': 1, + 'detail': [ + f'Upgrade daemon: {d.name()}: {e}' + ], + }) + return + num += 1 + if self.upgrade_state.remaining_count is not None and not d_entry[1]: + self.upgrade_state.remaining_count -= 1 + self._save_upgrade_state() + + def _handle_need_upgrade_self(self, need_upgrade_self: bool, upgrading_mgrs: bool) -> None: + if need_upgrade_self: + try: + self.mgr.mgr_service.fail_over() + except OrchestratorError as e: + self._fail_upgrade('UPGRADE_NO_STANDBY_MGR', { + 'severity': 'warning', + 'summary': f'Upgrade: {e}', + 'count': 1, + 'detail': [ + 'The upgrade process needs to upgrade the mgr, ' + 'but it needs at least one standby to proceed.', + ], + }) + return + + return # unreachable code, as fail_over never returns + elif upgrading_mgrs: + if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks: + del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR'] + self.mgr.set_health_checks(self.mgr.health_checks) + + def _set_container_images(self, daemon_type: str, target_image: str, image_settings: Dict[str, str]) -> None: + # push down configs + daemon_type_section = name_to_config_section(daemon_type) + if image_settings.get(daemon_type_section) != target_image: + logger.info('Upgrade: Setting container_image for all %s' % + daemon_type) + self.mgr.set_container_image(daemon_type_section, target_image) + to_clean = [] + for section in image_settings.keys(): + if section.startswith(name_to_config_section(daemon_type) + '.'): + to_clean.append(section) + if to_clean: + logger.debug('Upgrade: Cleaning up container_image for %s' % + to_clean) + for section in to_clean: + ret, image, err = self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'name': 'container_image', + 'who': section, + }) + + def _complete_osd_upgrade(self, target_major: str, target_major_name: str) -> None: + osdmap = self.mgr.get("osd_map") + osd_min_name = osdmap.get("require_osd_release", "argonaut") + osd_min = ceph_release_to_major(osd_min_name) + if osd_min < int(target_major): + logger.info( + f'Upgrade: Setting require_osd_release to {target_major} {target_major_name}') + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'osd require-osd-release', + 'release': target_major_name, + }) + + def _complete_mds_upgrade(self) -> None: + assert self.upgrade_state is not None + if self.upgrade_state.fs_original_max_mds: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + new_max = self.upgrade_state.fs_original_max_mds.get(fscid, 1) + if new_max > 1: + self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % ( + fs_name, new_max + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'max_mds', + 'val': str(new_max), + }) + + self.upgrade_state.fs_original_max_mds = {} + self._save_upgrade_state() + if self.upgrade_state.fs_original_allow_standby_replay: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + asr = self.upgrade_state.fs_original_allow_standby_replay.get(fscid, False) + if asr: + self.mgr.log.info('Upgrade: Enabling allow_standby_replay on filesystem %s' % ( + fs_name + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '1' + }) + + self.upgrade_state.fs_original_allow_standby_replay = {} + self._save_upgrade_state() + + def _mark_upgrade_complete(self) -> None: + if not self.upgrade_state: + logger.debug('_mark_upgrade_complete upgrade already marked complete, exiting') + return + logger.info('Upgrade: Complete!') + if self.upgrade_state.progress_id: + self.mgr.remote('progress', 'complete', + self.upgrade_state.progress_id) + self.upgrade_state = None + self._save_upgrade_state() + + def _do_upgrade(self): + # type: () -> None + if not self.upgrade_state: + logger.debug('_do_upgrade no state, exiting') + return + + target_image = self.target_image + target_id = self.upgrade_state.target_id + target_digests = self.upgrade_state.target_digests + target_version = self.upgrade_state.target_version + + first = False + if not target_id or not target_version or not target_digests: + # need to learn the container hash + logger.info('Upgrade: First pull of %s' % target_image) + self.upgrade_info_str = 'Doing first pull of %s image' % (target_image) + try: + target_id, target_version, target_digests = CephadmServe(self.mgr)._get_container_image_info( + target_image) + except OrchestratorError as e: + self._fail_upgrade('UPGRADE_FAILED_PULL', { + 'severity': 'warning', + 'summary': 'Upgrade: failed to pull target image', + 'count': 1, + 'detail': [str(e)], + }) + return + if not target_version: + self._fail_upgrade('UPGRADE_FAILED_PULL', { + 'severity': 'warning', + 'summary': 'Upgrade: failed to pull target image', + 'count': 1, + 'detail': ['unable to extract ceph version from container'], + }) + return + self.upgrade_state.target_id = target_id + # extract the version portion of 'ceph version {version} ({sha1})' + self.upgrade_state.target_version = target_version.split(' ')[2] + self.upgrade_state.target_digests = target_digests + self._save_upgrade_state() + target_image = self.target_image + first = True + + if target_digests is None: + target_digests = [] + if target_version.startswith('ceph version '): + # tolerate/fix upgrade state from older version + self.upgrade_state.target_version = target_version.split(' ')[2] + target_version = self.upgrade_state.target_version + (target_major, _) = target_version.split('.', 1) + target_major_name = self.mgr.lookup_release_name(int(target_major)) + + if first: + logger.info('Upgrade: Target is version %s (%s)' % ( + target_version, target_major_name)) + logger.info('Upgrade: Target container is %s, digests %s' % ( + target_image, target_digests)) + + version_error = self._check_target_version(target_version) + if version_error: + self._fail_upgrade('UPGRADE_BAD_TARGET_VERSION', { + 'severity': 'error', + 'summary': f'Upgrade: cannot upgrade/downgrade to {target_version}', + 'count': 1, + 'detail': [version_error], + }) + return + + image_settings = self.get_distinct_container_image_settings() + + # Older monitors (pre-v16.2.5) asserted that FSMap::compat == + # MDSMap::compat for all fs. This is no longer the case beginning in + # v16.2.5. We must disable the sanity checks during upgrade. + # N.B.: we don't bother confirming the operator has not already + # disabled this or saving the config value. + self.mgr.check_mon_command({ + 'prefix': 'config set', + 'name': 'mon_mds_skip_sanity', + 'value': '1', + 'who': 'mon', + }) + + if self.upgrade_state.daemon_types is not None: + logger.debug(f'Filtering daemons to upgrade by daemon types: {self.upgrade_state.daemon_types}') + daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type in self.upgrade_state.daemon_types] + elif self.upgrade_state.services is not None: + logger.debug(f'Filtering daemons to upgrade by services: {self.upgrade_state.daemon_types}') + daemons = [] + for service in self.upgrade_state.services: + daemons += self.mgr.cache.get_daemons_by_service(service) + else: + daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type in CEPH_UPGRADE_ORDER] + if self.upgrade_state.hosts is not None: + logger.debug(f'Filtering daemons to upgrade by hosts: {self.upgrade_state.hosts}') + daemons = [d for d in daemons if d.hostname in self.upgrade_state.hosts] + upgraded_daemon_count: int = 0 + for daemon_type in CEPH_UPGRADE_ORDER: + if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0: + # we hit our limit and should end the upgrade + # except for cases where we only need to redeploy, but not actually upgrade + # the image (which we don't count towards our limit). This case only occurs with mgr + # and monitoring stack daemons. Additionally, this case is only valid if + # the active mgr is already upgraded. + if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): + if daemon_type not in MONITORING_STACK_TYPES and daemon_type != 'mgr': + continue + else: + self._mark_upgrade_complete() + return + logger.debug('Upgrade: Checking %s daemons' % daemon_type) + daemons_of_type = [d for d in daemons if d.daemon_type == daemon_type] + + need_upgrade_self, need_upgrade, need_upgrade_deployer, done = self._detect_need_upgrade(daemons_of_type, target_digests) + upgraded_daemon_count += done + self._update_upgrade_progress(upgraded_daemon_count / len(daemons)) + + # make sure mgr and monitoring stack daemons are properly redeployed in staggered upgrade scenarios + if daemon_type == 'mgr' or daemon_type in MONITORING_STACK_TYPES: + if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): + need_upgrade_names = [d[0].name() for d in need_upgrade] + [d[0].name() for d in need_upgrade_deployer] + dds = [d for d in self.mgr.cache.get_daemons_by_type(daemon_type) if d.name() not in need_upgrade_names] + need_upgrade_active, n1, n2, __ = self._detect_need_upgrade(dds, target_digests) + if not n1: + if not need_upgrade_self and need_upgrade_active: + need_upgrade_self = True + need_upgrade_deployer += n2 + else: + # no point in trying to redeploy with new version if active mgr is not on the new version + need_upgrade_deployer = [] + + if not need_upgrade_self: + # only after the mgr itself is upgraded can we expect daemons to have + # deployed_by == target_digests + need_upgrade += need_upgrade_deployer + + # prepare filesystems for daemon upgrades? + if ( + daemon_type == 'mds' + and need_upgrade + and not self._prepare_for_mds_upgrade(target_major, [d_entry[0] for d_entry in need_upgrade]) + ): + return + + if need_upgrade: + self.upgrade_info_str = 'Currently upgrading %s daemons' % (daemon_type) + + _continue, to_upgrade = self._to_upgrade(need_upgrade, target_image) + if not _continue: + return + self._upgrade_daemons(to_upgrade, target_image, target_digests) + if to_upgrade: + return + + self._handle_need_upgrade_self(need_upgrade_self, daemon_type == 'mgr') + + # following bits of _do_upgrade are for completing upgrade for given + # types. If we haven't actually finished upgrading all the daemons + # of this type, we should exit the loop here + _, n1, n2, _ = self._detect_need_upgrade(self.mgr.cache.get_daemons_by_type(daemon_type), target_digests) + if n1 or n2: + continue + + # complete mon upgrade? + if daemon_type == 'mon': + if not self.mgr.get("have_local_config_map"): + logger.info('Upgrade: Restarting mgr now that mons are running pacific') + need_upgrade_self = True + + self._handle_need_upgrade_self(need_upgrade_self, daemon_type == 'mgr') + + # make sure 'ceph versions' agrees + ret, out_ver, err = self.mgr.check_mon_command({ + 'prefix': 'versions', + }) + j = json.loads(out_ver) + for version, count in j.get(daemon_type, {}).items(): + short_version = version.split(' ')[2] + if short_version != target_version: + logger.warning( + 'Upgrade: %d %s daemon(s) are %s != target %s' % + (count, daemon_type, short_version, target_version)) + + self._set_container_images(daemon_type, target_image, image_settings) + + logger.debug('Upgrade: All %s daemons are up to date.' % daemon_type) + + # complete osd upgrade? + if daemon_type == 'osd': + self._complete_osd_upgrade(target_major, target_major_name) + + # complete mds upgrade? + if daemon_type == 'mds': + self._complete_mds_upgrade() + + logger.debug('Upgrade: Upgraded %s daemon(s).' % daemon_type) + + # clean up + logger.info('Upgrade: Finalizing container_image settings') + self.mgr.set_container_image('global', target_image) + + for daemon_type in CEPH_UPGRADE_ORDER: + ret, image, err = self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'name': 'container_image', + 'who': name_to_config_section(daemon_type), + }) + + self.mgr.check_mon_command({ + 'prefix': 'config rm', + 'name': 'mon_mds_skip_sanity', + 'who': 'mon', + }) + + self._mark_upgrade_complete() + return diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py new file mode 100644 index 000000000..a4940c361 --- /dev/null +++ b/src/pybind/mgr/cephadm/utils.py @@ -0,0 +1,136 @@ +import logging +import json +import socket +from enum import Enum +from functools import wraps +from typing import Optional, Callable, TypeVar, List, NewType, TYPE_CHECKING, Any, NamedTuple +from orchestrator import OrchestratorError + +if TYPE_CHECKING: + from cephadm import CephadmOrchestrator + +T = TypeVar('T') +logger = logging.getLogger(__name__) + +ConfEntity = NewType('ConfEntity', str) + + +class CephadmNoImage(Enum): + token = 1 + + +# ceph daemon types that use the ceph container image. +# NOTE: order important here as these are used for upgrade order +CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw', 'rbd-mirror', 'cephfs-mirror'] +GATEWAY_TYPES = ['iscsi', 'nfs'] +MONITORING_STACK_TYPES = ['node-exporter', 'prometheus', 'alertmanager', 'grafana'] +RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['nfs'] + +CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES + +# these daemon types use the ceph container image +CEPH_IMAGE_TYPES = CEPH_TYPES + ['iscsi', 'nfs'] + +# Used for _run_cephadm used for check-host etc that don't require an --image parameter +cephadmNoImage = CephadmNoImage.token + + +class ContainerInspectInfo(NamedTuple): + image_id: str + ceph_version: Optional[str] + repo_digests: Optional[List[str]] + + +def name_to_config_section(name: str) -> ConfEntity: + """ + Map from daemon names to ceph entity names (as seen in config) + """ + daemon_type = name.split('.', 1)[0] + if daemon_type in ['rgw', 'rbd-mirror', 'nfs', 'crash', 'iscsi']: + return ConfEntity('client.' + name) + elif daemon_type in ['mon', 'osd', 'mds', 'mgr', 'client']: + return ConfEntity(name) + else: + return ConfEntity('mon') + + +def forall_hosts(f: Callable[..., T]) -> Callable[..., List[T]]: + @wraps(f) + def forall_hosts_wrapper(*args: Any) -> List[T]: + from cephadm.module import CephadmOrchestrator + + # Some weired logic to make calling functions with multiple arguments work. + if len(args) == 1: + vals = args[0] + self = None + elif len(args) == 2: + self, vals = args + else: + assert 'either f([...]) or self.f([...])' + + def do_work(arg: Any) -> T: + if not isinstance(arg, tuple): + arg = (arg, ) + try: + if self: + return f(self, *arg) + return f(*arg) + except Exception: + logger.exception(f'executing {f.__name__}({args}) failed.') + raise + + assert CephadmOrchestrator.instance is not None + return CephadmOrchestrator.instance._worker_pool.map(do_work, vals) + + return forall_hosts_wrapper + + +def get_cluster_health(mgr: 'CephadmOrchestrator') -> str: + # check cluster health + ret, out, err = mgr.check_mon_command({ + 'prefix': 'health', + 'format': 'json', + }) + try: + j = json.loads(out) + except ValueError: + msg = 'Failed to parse health status: Cannot decode JSON' + logger.exception('%s: \'%s\'' % (msg, out)) + raise OrchestratorError('failed to parse health status') + + return j['status'] + + +def is_repo_digest(image_name: str) -> bool: + """ + repo digest are something like "ceph/ceph@sha256:blablabla" + """ + return '@' in image_name + + +def resolve_ip(hostname: str) -> str: + try: + r = socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME, + type=socket.SOCK_STREAM) + # pick first v4 IP, if present + for a in r: + if a[0] == socket.AF_INET: + return a[4][0] + return r[0][4][0] + except socket.gaierror as e: + raise OrchestratorError(f"Cannot resolve ip for host {hostname}: {e}") + + +def ceph_release_to_major(release: str) -> int: + return ord(release[0]) - ord('a') + 1 + + +def file_mode_to_str(mode: int) -> str: + r = '' + for shift in range(0, 9, 3): + r = ( + f'{"r" if (mode >> shift) & 4 else "-"}' + f'{"w" if (mode >> shift) & 2 else "-"}' + f'{"x" if (mode >> shift) & 1 else "-"}' + ) + r + return r diff --git a/src/pybind/mgr/cephadm/vagrant.config.example.json b/src/pybind/mgr/cephadm/vagrant.config.example.json new file mode 100644 index 000000000..5b1890924 --- /dev/null +++ b/src/pybind/mgr/cephadm/vagrant.config.example.json @@ -0,0 +1,13 @@ +/** + * To use a permenant config copy this file to "vagrant.config.json", + * edit it and remove this comment beacuase comments are not allowed + * in a valid JSON file. + */ + +{ + "mgrs": 1, + "mons": 1, + "osds": 1, + "disks": 2 +} + |