summaryrefslogtreecommitdiffstats
path: root/src/pybind/mgr/cephadm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/pybind/mgr/cephadm/.gitignore2
-rw-r--r--src/pybind/mgr/cephadm/HACKING.rst272
-rw-r--r--src/pybind/mgr/cephadm/Vagrantfile66
-rw-r--r--src/pybind/mgr/cephadm/__init__.py10
-rw-r--r--src/pybind/mgr/cephadm/autotune.py54
-rw-r--r--src/pybind/mgr/cephadm/ceph.repo23
-rw-r--r--src/pybind/mgr/cephadm/configchecks.py705
-rw-r--r--src/pybind/mgr/cephadm/inventory.py1019
-rw-r--r--src/pybind/mgr/cephadm/migrations.py333
-rw-r--r--src/pybind/mgr/cephadm/module.py2974
-rw-r--r--src/pybind/mgr/cephadm/offline_watcher.py70
-rw-r--r--src/pybind/mgr/cephadm/registry.py65
-rw-r--r--src/pybind/mgr/cephadm/remotes.py50
-rw-r--r--src/pybind/mgr/cephadm/schedule.py447
-rw-r--r--src/pybind/mgr/cephadm/serve.py1487
-rw-r--r--src/pybind/mgr/cephadm/services/__init__.py0
-rw-r--r--src/pybind/mgr/cephadm/services/cephadmservice.py1043
-rw-r--r--src/pybind/mgr/cephadm/services/container.py29
-rw-r--r--src/pybind/mgr/cephadm/services/exporter.py147
-rw-r--r--src/pybind/mgr/cephadm/services/ingress.py296
-rw-r--r--src/pybind/mgr/cephadm/services/iscsi.py210
-rw-r--r--src/pybind/mgr/cephadm/services/monitoring.py502
-rw-r--r--src/pybind/mgr/cephadm/services/nfs.py290
-rw-r--r--src/pybind/mgr/cephadm/services/osd.py956
-rw-r--r--src/pybind/mgr/cephadm/template.py109
-rw-r--r--src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j21
-rw-r--r--src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j247
-rw-r--r--src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j218
-rw-r--r--src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j224
-rw-r--r--src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j283
-rw-r--r--src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j234
-rw-r--r--src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j213
-rw-r--r--src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j235
-rw-r--r--src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j241
-rw-r--r--src/pybind/mgr/cephadm/tests/__init__.py0
-rw-r--r--src/pybind/mgr/cephadm/tests/conftest.py27
-rw-r--r--src/pybind/mgr/cephadm/tests/fixtures.py151
-rw-r--r--src/pybind/mgr/cephadm/tests/test_autotune.py69
-rw-r--r--src/pybind/mgr/cephadm/tests/test_cephadm.py1805
-rw-r--r--src/pybind/mgr/cephadm/tests/test_completion.py40
-rw-r--r--src/pybind/mgr/cephadm/tests/test_configchecks.py668
-rw-r--r--src/pybind/mgr/cephadm/tests/test_facts.py10
-rw-r--r--src/pybind/mgr/cephadm/tests/test_migration.py229
-rw-r--r--src/pybind/mgr/cephadm/tests/test_osd_removal.py298
-rw-r--r--src/pybind/mgr/cephadm/tests/test_scheduling.py1591
-rw-r--r--src/pybind/mgr/cephadm/tests/test_services.py1031
-rw-r--r--src/pybind/mgr/cephadm/tests/test_spec.py600
-rw-r--r--src/pybind/mgr/cephadm/tests/test_template.py33
-rw-r--r--src/pybind/mgr/cephadm/tests/test_upgrade.py322
-rw-r--r--src/pybind/mgr/cephadm/upgrade.py1167
-rw-r--r--src/pybind/mgr/cephadm/utils.py136
-rw-r--r--src/pybind/mgr/cephadm/vagrant.config.example.json13
52 files changed, 19645 insertions, 0 deletions
diff --git a/src/pybind/mgr/cephadm/.gitignore b/src/pybind/mgr/cephadm/.gitignore
new file mode 100644
index 000000000..a273f8603
--- /dev/null
+++ b/src/pybind/mgr/cephadm/.gitignore
@@ -0,0 +1,2 @@
+.vagrant
+ssh-config
diff --git a/src/pybind/mgr/cephadm/HACKING.rst b/src/pybind/mgr/cephadm/HACKING.rst
new file mode 100644
index 000000000..fa6ea9e1b
--- /dev/null
+++ b/src/pybind/mgr/cephadm/HACKING.rst
@@ -0,0 +1,272 @@
+Development
+===========
+
+
+There are multiple ways to set up a development environment for the SSH orchestrator.
+In the following I'll use the `vstart` method.
+
+1) Make sure remoto is installed (0.35 or newer)
+
+2) Use vstart to spin up a cluster
+
+
+::
+
+ # ../src/vstart.sh -n --cephadm
+
+*Note that when you specify `--cephadm` you have to have passwordless ssh access to localhost*
+
+It will add your ~/.ssh/id_rsa and ~/.ssh/id_rsa.pub to `mgr/ssh/ssh_identity_{key, pub}`
+and add your $HOSTNAME to the list of known hosts.
+
+This will also enable the cephadm mgr module and enable it as the orchestrator backend.
+
+*Optional:*
+
+While the above is sufficient for most operations, you may want to add a second host to the mix.
+There is `Vagrantfile` for creating a minimal cluster in `src/pybind/mgr/cephadm/`.
+
+If you wish to extend the one-node-localhost cluster to i.e. test more sophisticated OSD deployments you can follow the next steps:
+
+From within the `src/pybind/mgr/cephadm` directory.
+
+
+1) Spawn VMs
+
+::
+
+ # vagrant up
+
+This will spawn three machines by default.
+mon0, mgr0 and osd0 with 2 additional disks.
+
+You can change that by passing `MONS` (default: 1), `MGRS` (default: 1), `OSDS` (default: 1) and
+`DISKS` (default: 2) environment variables to overwrite the defaults. In order to not always have
+to set the environment variables you can now create as JSON see `./vagrant.config.example.json`
+for details.
+
+If will also come with the necessary packages preinstalled as well as your ~/.ssh/id_rsa.pub key
+injected. (to users root and vagrant; the cephadm-orchestrator currently connects as root)
+
+
+2) Update the ssh-config
+
+The cephadm orchestrator needs to understand how to connect to the new node. Most likely the VM
+isn't reachable with the default settings used:
+
+```
+Host *
+User root
+StrictHostKeyChecking no
+```
+
+You want to adjust this by retrieving an adapted ssh_config from Vagrant.
+
+::
+
+ # vagrant ssh-config > ssh-config
+
+
+Now set the newly created config for Ceph.
+
+::
+
+ # ceph cephadm set-ssh-config -i <path_to_ssh_conf>
+
+
+3) Add the new host
+
+Add the newly created host(s) to the inventory.
+
+::
+
+
+ # ceph orch host add <host>
+
+
+4) Verify the inventory
+
+You should see the hostname in the list.
+
+::
+
+ # ceph orch host ls
+
+
+5) Verify the devices
+
+To verify all disks are set and in good shape look if all devices have been spawned
+and can be found
+
+::
+
+ # ceph orch device ls
+
+
+6) Make a snapshot of all your VMs!
+
+To not go the long way again the next time snapshot your VMs in order to revert them back
+if they are dirty.
+
+In `this repository <https://github.com/Devp00l/vagrant-helper-scripts>`_ you can find two
+scripts that will help you with doing a snapshot and reverting it, without having to manual
+snapshot and revert each VM individually.
+
+
+Understanding ``AsyncCompletion``
+=================================
+
+How can I store temporary variables?
+------------------------------------
+
+Let's imagine you want to write code similar to
+
+.. code:: python
+
+ hosts = self.get_hosts()
+ inventory = self.get_inventory(hosts)
+ return self._create_osd(hosts, drive_group, inventory)
+
+That won't work, as ``get_hosts`` and ``get_inventory`` return objects
+of type ``AsyncCompletion``.
+
+Now let's imaging a Python 3 world, where we can use ``async`` and
+``await``. Then we actually can write this like so:
+
+.. code:: python
+
+ hosts = await self.get_hosts()
+ inventory = await self.get_inventory(hosts)
+ return self._create_osd(hosts, drive_group, inventory)
+
+Let's use a simple example to make this clear:
+
+.. code:: python
+
+ val = await func_1()
+ return func_2(val)
+
+As we're not yet in Python 3, we need to do write ``await`` manually by
+calling ``orchestrator.Completion.then()``:
+
+.. code:: python
+
+ func_1().then(lambda val: func_2(val))
+
+ # or
+ func_1().then(func_2)
+
+Now let's desugar the original example:
+
+.. code:: python
+
+ hosts = await self.get_hosts()
+ inventory = await self.get_inventory(hosts)
+ return self._create_osd(hosts, drive_group, inventory)
+
+Now let's replace one ``async`` at a time:
+
+.. code:: python
+
+ hosts = await self.get_hosts()
+ return self.get_inventory(hosts).then(lambda inventory:
+ self._create_osd(hosts, drive_group, inventory))
+
+Then finally:
+
+.. code:: python
+
+ self.get_hosts().then(lambda hosts:
+ self.get_inventory(hosts).then(lambda inventory:
+ self._create_osd(hosts,
+ drive_group, inventory)))
+
+This also works without lambdas:
+
+.. code:: python
+
+ def call_inventory(hosts):
+ def call_create(inventory)
+ return self._create_osd(hosts, drive_group, inventory)
+
+ return self.get_inventory(hosts).then(call_create)
+
+ self.get_hosts(call_inventory)
+
+We should add support for ``await`` as soon as we're on Python 3.
+
+I want to call my function for every host!
+------------------------------------------
+
+Imagine you have a function that looks like so:
+
+.. code:: python
+
+ @async_completion
+ def deploy_stuff(name, node):
+ ...
+
+And you want to call ``deploy_stuff`` like so:
+
+.. code:: python
+
+ return [deploy_stuff(name, node) for node in nodes]
+
+This won't work as expected. The number of ``AsyncCompletion`` objects
+created should be ``O(1)``. But there is a solution:
+``@async_map_completion``
+
+.. code:: python
+
+ @async_map_completion
+ def deploy_stuff(name, node):
+ ...
+
+ return deploy_stuff([(name, node) for node in nodes])
+
+This way, we're only creating one ``AsyncCompletion`` object. Note that
+you should not create new ``AsyncCompletion`` within ``deploy_stuff``, as
+we're then no longer have ``O(1)`` completions:
+
+.. code:: python
+
+ @async_completion
+ def other_async_function():
+ ...
+
+ @async_map_completion
+ def deploy_stuff(name, node):
+ return other_async_function() # wrong!
+
+Why do we need this?
+--------------------
+
+I've tried to look into making Completions composable by being able to
+call one completion from another completion. I.e. making them re-usable
+using Promises E.g.:
+
+.. code:: python
+
+ >>> return self.get_hosts().then(self._create_osd)
+
+where ``get_hosts`` returns a Completion of list of hosts and
+``_create_osd`` takes a list of hosts.
+
+The concept behind this is to store the computation steps explicit and
+then explicitly evaluate the chain:
+
+.. code:: python
+
+ p = Completion(on_complete=lambda x: x*2).then(on_complete=lambda x: str(x))
+ p.finalize(2)
+ assert p.result = "4"
+
+or graphically:
+
+::
+
+ +---------------+ +-----------------+
+ | | then | |
+ | lambda x: x*x | +--> | lambda x: str(x)|
+ | | | |
+ +---------------+ +-----------------+
diff --git a/src/pybind/mgr/cephadm/Vagrantfile b/src/pybind/mgr/cephadm/Vagrantfile
new file mode 100644
index 000000000..3a08380c3
--- /dev/null
+++ b/src/pybind/mgr/cephadm/Vagrantfile
@@ -0,0 +1,66 @@
+# vi: set ft=ruby :
+#
+# In order to reduce the need of recreating all vagrant boxes everytime they
+# get dirty, snaptshot them and revert the snapshot of them instead.
+# Two helpful scripts to do this easily can be found here:
+# https://github.com/Devp00l/vagrant-helper-scripts
+
+require 'json'
+configFileName = 'vagrant.config.json'
+CONFIG = File.file?(configFileName) && JSON.parse(File.read(File.join(File.dirname(__FILE__), configFileName)))
+
+def getConfig(name, default)
+ down = name.downcase
+ up = name.upcase
+ CONFIG && CONFIG[down] ? CONFIG[down] : (ENV[up] ? ENV[up].to_i : default)
+end
+
+OSDS = getConfig('OSDS', 1)
+MGRS = getConfig('MGRS', 1)
+MONS = getConfig('MONS', 1)
+DISKS = getConfig('DISKS', 2)
+
+# Activate only for test purpose as it changes the output of each vagrant command link to get the ssh_config.
+# puts "Your setup:","OSDs: #{OSDS}","MGRs: #{MGRS}","MONs: #{MONS}","Disks per OSD: #{DISKS}"
+
+Vagrant.configure("2") do |config|
+ config.vm.synced_folder ".", "/vagrant", disabled: true
+ config.vm.network "private_network", type: "dhcp"
+ config.vm.box = "centos/8"
+
+ (0..MONS - 1).each do |i|
+ config.vm.define "mon#{i}" do |mon|
+ mon.vm.hostname = "mon#{i}"
+ end
+ end
+ (0..MGRS - 1).each do |i|
+ config.vm.define "mgr#{i}" do |mgr|
+ mgr.vm.hostname = "mgr#{i}"
+ end
+ end
+ (0..OSDS - 1).each do |i|
+ config.vm.define "osd#{i}" do |osd|
+ osd.vm.hostname = "osd#{i}"
+ osd.vm.provider :libvirt do |libvirt|
+ (0..DISKS - 1).each do |d|
+ # In ruby value.chr makes ASCII char from value
+ libvirt.storage :file, :size => '20G', :device => "vd#{(98+d).chr}#{i}"
+ end
+ end
+ end
+ end
+
+ config.vm.provision "file", source: "~/.ssh/id_rsa.pub", destination: "~/.ssh/id_rsa.pub"
+ config.vm.provision "shell", inline: <<-SHELL
+ cat /home/vagrant/.ssh/id_rsa.pub >> /home/vagrant/.ssh/authorized_keys
+ sudo cp -r /home/vagrant/.ssh /root/.ssh
+ SHELL
+
+ config.vm.provision "shell", inline: <<-SHELL
+ sudo yum install -y yum-utils
+ sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
+ sudo rpm --import 'https://download.ceph.com/keys/release.asc'
+ curl -L https://shaman.ceph.com/api/repos/ceph/master/latest/centos/8/repo/ | sudo tee /etc/yum.repos.d/shaman.repo
+ sudo yum install -y python36 podman ceph
+ SHELL
+end
diff --git a/src/pybind/mgr/cephadm/__init__.py b/src/pybind/mgr/cephadm/__init__.py
new file mode 100644
index 000000000..597d883f7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/__init__.py
@@ -0,0 +1,10 @@
+from .module import CephadmOrchestrator
+
+__all__ = [
+ "CephadmOrchestrator",
+]
+
+import os
+if 'UNITTEST' in os.environ:
+ import tests
+ __all__.append(tests.__name__)
diff --git a/src/pybind/mgr/cephadm/autotune.py b/src/pybind/mgr/cephadm/autotune.py
new file mode 100644
index 000000000..51c931cba
--- /dev/null
+++ b/src/pybind/mgr/cephadm/autotune.py
@@ -0,0 +1,54 @@
+import logging
+from typing import List, Optional, Callable, Any, Tuple
+
+from orchestrator._interface import DaemonDescription
+
+logger = logging.getLogger(__name__)
+
+
+class MemoryAutotuner(object):
+
+ min_size_by_type = {
+ 'mds': 4096 * 1048576,
+ 'mgr': 4096 * 1048576,
+ 'mon': 1024 * 1048576,
+ 'crash': 128 * 1048576,
+ 'keepalived': 128 * 1048576,
+ 'haproxy': 128 * 1048576,
+ }
+ default_size = 1024 * 1048576
+
+ def __init__(
+ self,
+ daemons: List[DaemonDescription],
+ config_get: Callable[[str, str], Any],
+ total_mem: int,
+ ):
+ self.daemons = daemons
+ self.config_get = config_get
+ self.total_mem = total_mem
+
+ def tune(self) -> Tuple[Optional[int], List[str]]:
+ tuned_osds: List[str] = []
+ total = self.total_mem
+ for d in self.daemons:
+ if d.daemon_type == 'mds':
+ total -= self.config_get(d.name(), 'mds_cache_memory_limit')
+ continue
+ if d.daemon_type != 'osd':
+ assert d.daemon_type
+ total -= max(
+ self.min_size_by_type.get(d.daemon_type, self.default_size),
+ d.memory_usage or 0
+ )
+ continue
+ if not self.config_get(d.name(), 'osd_memory_target_autotune'):
+ total -= self.config_get(d.name(), 'osd_memory_target')
+ continue
+ tuned_osds.append(d.name())
+ if total < 0:
+ return None, []
+ if not tuned_osds:
+ return None, []
+ per = total // len(tuned_osds)
+ return int(per), tuned_osds
diff --git a/src/pybind/mgr/cephadm/ceph.repo b/src/pybind/mgr/cephadm/ceph.repo
new file mode 100644
index 000000000..6f710e7ce
--- /dev/null
+++ b/src/pybind/mgr/cephadm/ceph.repo
@@ -0,0 +1,23 @@
+[ceph]
+name=Ceph packages for $basearch
+baseurl=https://download.ceph.com/rpm-mimic/el7/$basearch
+enabled=1
+priority=2
+gpgcheck=1
+gpgkey=https://download.ceph.com/keys/release.asc
+
+[ceph-noarch]
+name=Ceph noarch packages
+baseurl=https://download.ceph.com/rpm-mimic/el7/noarch
+enabled=1
+priority=2
+gpgcheck=1
+gpgkey=https://download.ceph.com/keys/release.asc
+
+[ceph-source]
+name=Ceph source packages
+baseurl=https://download.ceph.com/rpm-mimic/el7/SRPMS
+enabled=0
+priority=2
+gpgcheck=1
+gpgkey=https://download.ceph.com/keys/release.asc
diff --git a/src/pybind/mgr/cephadm/configchecks.py b/src/pybind/mgr/cephadm/configchecks.py
new file mode 100644
index 000000000..dc7a09827
--- /dev/null
+++ b/src/pybind/mgr/cephadm/configchecks.py
@@ -0,0 +1,705 @@
+import json
+import ipaddress
+import logging
+
+from mgr_module import ServiceInfoT
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast, Tuple, Callable
+
+if TYPE_CHECKING:
+ from cephadm.module import CephadmOrchestrator
+
+logger = logging.getLogger(__name__)
+
+
+class HostFacts:
+
+ def __init__(self) -> None:
+ self.arch: Optional[str] = None
+ self.bios_date: Optional[str] = None
+ self.bios_version: Optional[str] = None
+ self.cpu_cores: Optional[int] = None
+ self.cpu_count: Optional[int] = None
+ self.cpu_load: Optional[Dict[str, float]] = None
+ self.cpu_model: Optional[str] = None
+ self.cpu_threads: Optional[int] = None
+ self.flash_capacity: Optional[str] = None
+ self.flash_capacity_bytes: Optional[int] = None
+ self.flash_count: Optional[int] = None
+ self.flash_list: Optional[List[Dict[str, Any]]] = None
+ self.hdd_capacity: Optional[str] = None
+ self.hdd_capacity_bytes: Optional[int] = None
+ self.hdd_count: Optional[int] = None
+ self.hdd_list: Optional[List[Dict[str, Any]]] = None
+ self.hostname: Optional[str] = None
+ self.interfaces: Optional[Dict[str, Dict[str, Any]]] = None
+ self.kernel: Optional[str] = None
+ self.kernel_parameters: Optional[Dict[str, Any]] = None
+ self.kernel_security: Optional[Dict[str, str]] = None
+ self.memory_available_kb: Optional[int] = None
+ self.memory_free_kb: Optional[int] = None
+ self.memory_total_kb: Optional[int] = None
+ self.model: Optional[str] = None
+ self.nic_count: Optional[int] = None
+ self.operating_system: Optional[str] = None
+ self.subscribed: Optional[str] = None
+ self.system_uptime: Optional[float] = None
+ self.timestamp: Optional[float] = None
+ self.vendor: Optional[str] = None
+ self._valid = False
+
+ def load_facts(self, json_data: Dict[str, Any]) -> None:
+
+ if isinstance(json_data, dict):
+ keys = json_data.keys()
+ if all([k in keys for k in self.__dict__ if not k.startswith('_')]):
+ self._valid = True
+ for k in json_data.keys():
+ if hasattr(self, k):
+ setattr(self, k, json_data[k])
+ else:
+ self._valid = False
+ else:
+ self._valid = False
+
+ def subnet_to_nic(self, subnet: str) -> Optional[str]:
+ ip_version = ipaddress.ip_network(subnet).version
+ logger.debug(f"subnet {subnet} is IP version {ip_version}")
+ interfaces = cast(Dict[str, Dict[str, Any]], self.interfaces)
+ nic = None
+ for iface in interfaces.keys():
+ addr = ''
+ if ip_version == 4:
+ addr = interfaces[iface].get('ipv4_address', '')
+ else:
+ addr = interfaces[iface].get('ipv6_address', '')
+ if addr:
+ a = addr.split('/')[0]
+ if ipaddress.ip_address(a) in ipaddress.ip_network(subnet):
+ nic = iface
+ break
+ return nic
+
+
+class SubnetLookup:
+ def __init__(self, subnet: str, hostname: str, mtu: str, speed: str):
+ self.subnet = subnet
+ self.mtu_map = {
+ mtu: [hostname]
+ }
+ self.speed_map = {
+ speed: [hostname]
+ }
+
+ @ property
+ def host_list(self) -> List[str]:
+ hosts = []
+ for mtu in self.mtu_map:
+ hosts.extend(self.mtu_map.get(mtu, []))
+ return hosts
+
+ def update(self, hostname: str, mtu: str, speed: str) -> None:
+ if mtu in self.mtu_map and hostname not in self.mtu_map[mtu]:
+ self.mtu_map[mtu].append(hostname)
+ else:
+ self.mtu_map[mtu] = [hostname]
+
+ if speed in self.speed_map and hostname not in self.speed_map[speed]:
+ self.speed_map[speed].append(hostname)
+ else:
+ self.speed_map[speed] = [hostname]
+
+ def __repr__(self) -> str:
+ return json.dumps({
+ "subnet": self.subnet,
+ "mtu_map": self.mtu_map,
+ "speed_map": self.speed_map
+ })
+
+
+class CephadmCheckDefinition:
+ def __init__(self, mgr: "CephadmOrchestrator", healthcheck_name: str, description: str, name: str, func: Callable) -> None:
+ self.mgr = mgr
+ self.log = logger
+ self.healthcheck_name = healthcheck_name
+ self.description = description
+ self.name = name
+ self.func = func
+
+ @property
+ def status(self) -> str:
+ check_states: Dict[str, str] = {}
+ # Issuing a get each time, since the value could be set at the CLI
+ raw_states = self.mgr.get_store('config_checks')
+ if not raw_states:
+ self.log.error(
+ "config_checks setting is not defined - unable to determine healthcheck state")
+ return "Unknown"
+
+ try:
+ check_states = json.loads(raw_states)
+ except json.JSONDecodeError:
+ self.log.error("Unable to serialize the config_checks settings to JSON")
+ return "Unavailable"
+
+ return check_states.get(self.name, 'Missing')
+
+ def to_json(self) -> Dict[str, Any]:
+ return {
+ "healthcheck_name": self.healthcheck_name,
+ "description": self.description,
+ "name": self.name,
+ "status": self.status,
+ "valid": True if self.func else False
+ }
+
+
+class CephadmConfigChecks:
+ def __init__(self, mgr: "CephadmOrchestrator"):
+ self.mgr: "CephadmOrchestrator" = mgr
+ self.health_checks: List[CephadmCheckDefinition] = [
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_KERNEL_LSM",
+ "checks SELINUX/Apparmor profiles are consistent across cluster hosts",
+ "kernel_security",
+ self._check_kernel_lsm),
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_SUBSCRIPTION",
+ "checks subscription states are consistent for all cluster hosts",
+ "os_subscription",
+ self._check_subscription),
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_PUBLIC_MEMBERSHIP",
+ "check that all hosts have a NIC on the Ceph public_netork",
+ "public_network",
+ self._check_public_network),
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_MTU",
+ "check that OSD hosts share a common MTU setting",
+ "osd_mtu_size",
+ self._check_osd_mtu),
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_LINKSPEED",
+ "check that OSD hosts share a common linkspeed",
+ "osd_linkspeed",
+ self._check_osd_linkspeed),
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_NETWORK_MISSING",
+ "checks that the cluster/public networks defined exist on the Ceph hosts",
+ "network_missing",
+ self._check_network_missing),
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_CEPH_RELEASE",
+ "check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active)",
+ "ceph_release",
+ self._check_release_parity),
+ CephadmCheckDefinition(mgr, "CEPHADM_CHECK_KERNEL_VERSION",
+ "checks that the MAJ.MIN of the kernel on Ceph hosts is consistent",
+ "kernel_version",
+ self._check_kernel_version),
+ ]
+ self.log = logger
+ self.host_facts: Dict[str, HostFacts] = {}
+ self.subnet_lookup: Dict[str, SubnetLookup] = {} # subnet CIDR -> SubnetLookup Object
+ self.lsm_to_host: Dict[str, List[str]] = {}
+ self.subscribed: Dict[str, List[str]] = {
+ "yes": [],
+ "no": [],
+ "unknown": [],
+ }
+ self.host_to_role: Dict[str, List[str]] = {}
+ self.kernel_to_hosts: Dict[str, List[str]] = {}
+
+ self.public_network_list: List[str] = []
+ self.cluster_network_list: List[str] = []
+ self.health_check_raised = False
+ self.active_checks: List[str] = [] # checks enabled and executed
+ self.skipped_checks: List[str] = [] # checks enabled, but skipped due to a pre-req failure
+
+ raw_checks = self.mgr.get_store('config_checks')
+ if not raw_checks:
+ # doesn't exist, so seed the checks
+ self.seed_config_checks()
+ else:
+ # setting is there, so ensure there is an entry for each of the checks that
+ # this module supports (account for upgrades/changes)
+ try:
+ config_checks = json.loads(raw_checks)
+ except json.JSONDecodeError:
+ self.log.error("Unable to serialize config_checks config. Reset to defaults")
+ self.seed_config_checks()
+ else:
+ # Ensure the config_checks setting is consistent with this module
+ from_config = set(config_checks.keys())
+ from_module = set([c.name for c in self.health_checks])
+ old_checks = from_config.difference(from_module)
+ new_checks = from_module.difference(from_config)
+
+ if old_checks:
+ self.log.debug(f"old checks being removed from config_checks: {old_checks}")
+ for i in old_checks:
+ del config_checks[i]
+ if new_checks:
+ self.log.debug(f"new checks being added to config_checks: {new_checks}")
+ for i in new_checks:
+ config_checks[i] = 'enabled'
+
+ if old_checks or new_checks:
+ self.log.info(
+ f"config_checks updated: {len(old_checks)} removed, {len(new_checks)} added")
+ self.mgr.set_store('config_checks', json.dumps(config_checks))
+ else:
+ self.log.debug("config_checks match module definition")
+
+ def lookup_check(self, key_value: str, key_name: str = 'name') -> Optional[CephadmCheckDefinition]:
+
+ for c in self.health_checks:
+ if getattr(c, key_name) == key_value:
+ return c
+ return None
+
+ @property
+ def defined_checks(self) -> int:
+ return len(self.health_checks)
+
+ @property
+ def active_checks_count(self) -> int:
+ return len(self.active_checks)
+
+ def seed_config_checks(self) -> None:
+ defaults = {check.name: 'enabled' for check in self.health_checks}
+ self.mgr.set_store('config_checks', json.dumps(defaults))
+
+ @property
+ def skipped_checks_count(self) -> int:
+ return len(self.skipped_checks)
+
+ def to_json(self) -> List[Dict[str, str]]:
+ return [check.to_json() for check in self.health_checks]
+
+ def load_network_config(self) -> None:
+ ret, out, _err = self.mgr.check_mon_command({
+ 'prefix': 'config dump',
+ 'format': 'json'
+ })
+ assert ret == 0
+ js = json.loads(out)
+ for item in js:
+ if item['name'] == "cluster_network":
+ self.cluster_network_list = item['value'].strip().split(',')
+ if item['name'] == "public_network":
+ self.public_network_list = item['value'].strip().split(',')
+
+ self.log.debug(f"public networks {self.public_network_list}")
+ self.log.debug(f"cluster networks {self.cluster_network_list}")
+
+ def _update_subnet(self, subnet: str, hostname: str, nic: Dict[str, Any]) -> None:
+ mtu = nic.get('mtu', None)
+ speed = nic.get('speed', None)
+ if not mtu or not speed:
+ return
+
+ this_subnet = self.subnet_lookup.get(subnet, None)
+ if this_subnet:
+ this_subnet.update(hostname, mtu, speed)
+ else:
+ self.subnet_lookup[subnet] = SubnetLookup(subnet, hostname, mtu, speed)
+
+ def _update_subnet_lookups(self, hostname: str, devname: str, nic: Dict[str, Any]) -> None:
+ if nic['ipv4_address']:
+ try:
+ iface4 = ipaddress.IPv4Interface(nic['ipv4_address'])
+ subnet = str(iface4.network)
+ except ipaddress.AddressValueError as e:
+ self.log.exception(f"Invalid network on {hostname}, interface {devname} : {str(e)}")
+ else:
+ self._update_subnet(subnet, hostname, nic)
+
+ if nic['ipv6_address']:
+ try:
+ iface6 = ipaddress.IPv6Interface(nic['ipv6_address'])
+ subnet = str(iface6.network)
+ except ipaddress.AddressValueError as e:
+ self.log.exception(f"Invalid network on {hostname}, interface {devname} : {str(e)}")
+ else:
+ self._update_subnet(subnet, hostname, nic)
+
+ def hosts_with_role(self, role: str) -> List[str]:
+ host_list = []
+ for hostname, roles in self.host_to_role.items():
+ if role in roles:
+ host_list.append(hostname)
+ return host_list
+
+ def reset(self) -> None:
+ self.subnet_lookup.clear()
+ self.lsm_to_host.clear()
+ self.subscribed['yes'] = []
+ self.subscribed['no'] = []
+ self.subscribed['unknown'] = []
+ self.host_to_role.clear()
+ self.kernel_to_hosts.clear()
+
+ def _get_majority(self, data: Dict[str, List[str]]) -> Tuple[str, int]:
+ assert isinstance(data, dict)
+
+ majority_key = ''
+ majority_count = 0
+ for key in data:
+ if len(data[key]) > majority_count:
+ majority_count = len(data[key])
+ majority_key = key
+ return majority_key, majority_count
+
+ def get_ceph_metadata(self) -> Dict[str, Optional[Dict[str, str]]]:
+ """Build a map of service -> service metadata"""
+ service_map: Dict[str, Optional[Dict[str, str]]] = {}
+
+ for server in self.mgr.list_servers():
+ for service in cast(List[ServiceInfoT], server.get('services', [])):
+ if service:
+ service_map.update(
+ {
+ f"{service['type']}.{service['id']}":
+ self.mgr.get_metadata(service['type'], service['id'])
+ }
+ )
+ return service_map
+
+ def _check_kernel_lsm(self) -> None:
+ if len(self.lsm_to_host.keys()) > 1:
+
+ majority_hosts_ptr, majority_hosts_count = self._get_majority(self.lsm_to_host)
+ lsm_copy = self.lsm_to_host.copy()
+ del lsm_copy[majority_hosts_ptr]
+ details = []
+ for lsm_key in lsm_copy.keys():
+ for host in lsm_copy[lsm_key]:
+ details.append(
+ f"{host} has inconsistent KSM settings compared to the "
+ f"majority of hosts({majority_hosts_count}) in the cluster")
+ host_sfx = 's' if len(details) > 1 else ''
+ self.mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM'] = {
+ 'severity': 'warning',
+ 'summary': f"Kernel Security Module (SELinux/AppArmor) is inconsistent for "
+ f"{len(details)} host{host_sfx}",
+ 'count': len(details),
+ 'detail': details,
+ }
+ self.health_check_raised = True
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_KERNEL_LSM', None)
+
+ def _check_subscription(self) -> None:
+ if len(self.subscribed['yes']) > 0 and len(self.subscribed['no']) > 0:
+ # inconsistent subscription states - CEPHADM_CHECK_SUBSCRIPTION
+ details = []
+ for host in self.subscribed['no']:
+ details.append(f"{host} does not have an active subscription")
+ self.mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION'] = {
+ 'severity': 'warning',
+ 'summary': f"Support subscriptions inactive on {len(details)} host(s)"
+ f"({len(self.subscribed['yes'])} subscriptions active)",
+ 'count': len(details),
+ 'detail': details,
+ }
+ self.health_check_raised = True
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_SUBSCRIPTION', None)
+
+ def _check_public_network(self) -> None:
+ hosts_remaining: List[str] = list(self.mgr.cache.facts.keys())
+ hosts_removed: List[str] = []
+ self.log.debug(f"checking public network membership for: {hosts_remaining}")
+
+ for p_net in self.public_network_list:
+ self.log.debug(f"checking network {p_net}")
+ subnet_data = self.subnet_lookup.get(p_net, None)
+ self.log.debug(f"subnet data - {subnet_data}")
+
+ if subnet_data:
+ hosts_in_subnet = subnet_data.host_list
+ for host in hosts_in_subnet:
+ if host in hosts_remaining:
+ hosts_remaining.remove(host)
+ hosts_removed.append(host)
+ else:
+ if host not in hosts_removed:
+ self.log.debug(f"host={host}, subnet={p_net}")
+ self.log.exception(
+ "Host listed for a subnet but not present in the host facts?")
+
+ # Ideally all hosts will have been removed since they have an IP on at least
+ # one of the public networks
+ if hosts_remaining:
+ if len(hosts_remaining) != len(self.mgr.cache.facts):
+ # public network is visible on some hosts
+ details = [
+ f"{host} does not have an interface on any public network" for host in hosts_remaining]
+
+ self.mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP'] = {
+ 'severity': 'warning',
+ 'summary': f"Public network(s) is not directly accessible from {len(hosts_remaining)} "
+ "cluster hosts",
+ 'count': len(details),
+ 'detail': details,
+ }
+ self.health_check_raised = True
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_PUBLIC_MEMBERSHIP', None)
+
+ def _check_osd_mtu(self) -> None:
+ osd_hosts = set(self.hosts_with_role('osd'))
+ osd_network_list = self.cluster_network_list or self.public_network_list
+ mtu_errors: List[str] = []
+
+ for osd_net in osd_network_list:
+ subnet_data = self.subnet_lookup.get(osd_net, None)
+
+ if subnet_data:
+
+ self.log.debug(f"processing mtu map : {json.dumps(subnet_data.mtu_map)}")
+ mtu_count = {}
+ max_hosts = 0
+ mtu_ptr = ''
+ diffs = {}
+ for mtu, host_list in subnet_data.mtu_map.items():
+ mtu_hosts = set(host_list)
+ mtu_count[mtu] = len(mtu_hosts)
+ errors = osd_hosts.difference(mtu_hosts)
+ if errors:
+ diffs[mtu] = errors
+ if len(errors) > max_hosts:
+ mtu_ptr = mtu
+
+ if diffs:
+ self.log.debug("MTU problems detected")
+ self.log.debug(f"most hosts using {mtu_ptr}")
+ mtu_copy = subnet_data.mtu_map.copy()
+ del mtu_copy[mtu_ptr]
+ for bad_mtu in mtu_copy:
+ for h in mtu_copy[bad_mtu]:
+ host = HostFacts()
+ host.load_facts(self.mgr.cache.facts[h])
+ mtu_errors.append(
+ f"host {h}({host.subnet_to_nic(osd_net)}) is using MTU "
+ f"{bad_mtu} on {osd_net}, NICs on other hosts use {mtu_ptr}")
+
+ if mtu_errors:
+ self.mgr.health_checks['CEPHADM_CHECK_MTU'] = {
+ 'severity': 'warning',
+ 'summary': f"MTU setting inconsistent on osd network NICs on {len(mtu_errors)} host(s)",
+ 'count': len(mtu_errors),
+ 'detail': mtu_errors,
+ }
+ self.health_check_raised = True
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_MTU', None)
+
+ def _check_osd_linkspeed(self) -> None:
+ osd_hosts = set(self.hosts_with_role('osd'))
+ osd_network_list = self.cluster_network_list or self.public_network_list
+
+ linkspeed_errors = []
+
+ for osd_net in osd_network_list:
+ subnet_data = self.subnet_lookup.get(osd_net, None)
+
+ if subnet_data:
+
+ self.log.debug(f"processing subnet : {subnet_data}")
+
+ speed_count = {}
+ max_hosts = 0
+ speed_ptr = ''
+ diffs = {}
+ for speed, host_list in subnet_data.speed_map.items():
+ speed_hosts = set(host_list)
+ speed_count[speed] = len(speed_hosts)
+ errors = osd_hosts.difference(speed_hosts)
+ if errors:
+ diffs[speed] = errors
+ if len(errors) > max_hosts:
+ speed_ptr = speed
+
+ if diffs:
+ self.log.debug("linkspeed issue(s) detected")
+ self.log.debug(f"most hosts using {speed_ptr}")
+ speed_copy = subnet_data.speed_map.copy()
+ del speed_copy[speed_ptr]
+ for bad_speed in speed_copy:
+ if bad_speed > speed_ptr:
+ # skip speed is better than most...it can stay!
+ continue
+ for h in speed_copy[bad_speed]:
+ host = HostFacts()
+ host.load_facts(self.mgr.cache.facts[h])
+ linkspeed_errors.append(
+ f"host {h}({host.subnet_to_nic(osd_net)}) has linkspeed of "
+ f"{bad_speed} on {osd_net}, NICs on other hosts use {speed_ptr}")
+
+ if linkspeed_errors:
+ self.mgr.health_checks['CEPHADM_CHECK_LINKSPEED'] = {
+ 'severity': 'warning',
+ 'summary': "Link speed is inconsistent on osd network NICs for "
+ f"{len(linkspeed_errors)} host(s)",
+ 'count': len(linkspeed_errors),
+ 'detail': linkspeed_errors,
+ }
+ self.health_check_raised = True
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_LINKSPEED', None)
+
+ def _check_network_missing(self) -> None:
+ all_networks = self.public_network_list.copy()
+ all_networks.extend(self.cluster_network_list)
+
+ missing_networks = []
+ for subnet in all_networks:
+ subnet_data = self.subnet_lookup.get(subnet, None)
+
+ if not subnet_data:
+ missing_networks.append(f"{subnet} not found on any host in the cluster")
+ self.log.warning(
+ f"Network {subnet} has been defined, but is not present on any host")
+
+ if missing_networks:
+ net_sfx = 's' if len(missing_networks) > 1 else ''
+ self.mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING'] = {
+ 'severity': 'warning',
+ 'summary': f"Public/cluster network{net_sfx} defined, but can not be found on "
+ "any host",
+ 'count': len(missing_networks),
+ 'detail': missing_networks,
+ }
+ self.health_check_raised = True
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_NETWORK_MISSING', None)
+
+ def _check_release_parity(self) -> None:
+ upgrade_status = self.mgr.upgrade.upgrade_status()
+ if upgrade_status.in_progress:
+ # skip version consistency checks during an upgrade cycle
+ self.skipped_checks.append('ceph_release')
+ return
+
+ services = self.get_ceph_metadata()
+ self.log.debug(json.dumps(services))
+ version_to_svcs: Dict[str, List[str]] = {}
+
+ for svc in services:
+ if services[svc]:
+ metadata = cast(Dict[str, str], services[svc])
+ v = metadata.get('ceph_release', '')
+ if v in version_to_svcs:
+ version_to_svcs[v].append(svc)
+ else:
+ version_to_svcs[v] = [svc]
+
+ if len(version_to_svcs) > 1:
+ majority_ptr, _majority_count = self._get_majority(version_to_svcs)
+ ver_copy = version_to_svcs.copy()
+ del ver_copy[majority_ptr]
+ details = []
+ for v in ver_copy:
+ for svc in ver_copy[v]:
+ details.append(
+ f"{svc} is running {v} (majority of cluster is using {majority_ptr})")
+
+ self.mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE'] = {
+ 'severity': 'warning',
+ 'summary': 'Ceph cluster running mixed ceph releases',
+ 'count': len(details),
+ 'detail': details,
+ }
+ self.health_check_raised = True
+ self.log.warning(
+ f"running with {len(version_to_svcs)} different ceph releases within this cluster")
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_CEPH_RELEASE', None)
+
+ def _check_kernel_version(self) -> None:
+ if len(self.kernel_to_hosts.keys()) > 1:
+ majority_hosts_ptr, majority_hosts_count = self._get_majority(self.kernel_to_hosts)
+ kver_copy = self.kernel_to_hosts.copy()
+ del kver_copy[majority_hosts_ptr]
+ details = []
+ for k in kver_copy:
+ for h in kver_copy[k]:
+ details.append(
+ f"host {h} running kernel {k}, majority of hosts({majority_hosts_count}) "
+ f"running {majority_hosts_ptr}")
+
+ self.log.warning("mixed kernel versions detected")
+ self.mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION'] = {
+ 'severity': 'warning',
+ 'summary': f"{len(details)} host(s) running different kernel versions",
+ 'count': len(details),
+ 'detail': details,
+ }
+ self.health_check_raised = True
+ else:
+ self.mgr.health_checks.pop('CEPHADM_CHECK_KERNEL_VERSION', None)
+
+ def _process_hosts(self) -> None:
+ self.log.debug(f"processing data from {len(self.mgr.cache.facts)} hosts")
+ for hostname in self.mgr.cache.facts:
+ host = HostFacts()
+ host.load_facts(self.mgr.cache.facts[hostname])
+ if not host._valid:
+ self.log.warning(f"skipping {hostname} - incompatible host facts")
+ continue
+
+ kernel_lsm = cast(Dict[str, str], host.kernel_security)
+ lsm_desc = kernel_lsm.get('description', '')
+ if lsm_desc:
+ if lsm_desc in self.lsm_to_host:
+ self.lsm_to_host[lsm_desc].append(hostname)
+ else:
+ self.lsm_to_host[lsm_desc] = [hostname]
+
+ subscription_state = host.subscribed.lower() if host.subscribed else None
+ if subscription_state:
+ self.subscribed[subscription_state].append(hostname)
+
+ interfaces = cast(Dict[str, Dict[str, Any]], host.interfaces)
+ for name in interfaces.keys():
+ if name in ['lo']:
+ continue
+ self._update_subnet_lookups(hostname, name, interfaces[name])
+
+ if host.kernel:
+ kernel_maj_min = '.'.join(host.kernel.split('.')[0:2])
+ if kernel_maj_min in self.kernel_to_hosts:
+ self.kernel_to_hosts[kernel_maj_min].append(hostname)
+ else:
+ self.kernel_to_hosts[kernel_maj_min] = [hostname]
+ else:
+ self.log.warning(f"Host gather facts for {hostname} is missing kernel information")
+
+ # NOTE: if daemondescription had systemd enabled state, we could check for systemd 'tampering'
+ self.host_to_role[hostname] = list(self.mgr.cache.get_daemon_types(hostname))
+
+ def run_checks(self) -> None:
+ checks_enabled = self.mgr.get_module_option('config_checks_enabled')
+ if checks_enabled is not True:
+ return
+
+ self.reset()
+
+ check_config: Dict[str, str] = {}
+ checks_raw: Optional[str] = self.mgr.get_store('config_checks')
+ if checks_raw:
+ try:
+ check_config.update(json.loads(checks_raw))
+ except json.JSONDecodeError:
+ self.log.exception(
+ "mgr/cephadm/config_checks is not JSON serializable - all checks will run")
+
+ # build lookup "maps" by walking the host facts, once
+ self._process_hosts()
+
+ self.health_check_raised = False
+ self.active_checks = []
+ self.skipped_checks = []
+
+ # process all healthchecks that are not explcitly disabled
+ for health_check in self.health_checks:
+ if check_config.get(health_check.name, '') != 'disabled':
+ self.active_checks.append(health_check.name)
+ health_check.func()
+
+ self.mgr.set_health_checks(self.mgr.health_checks)
diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py
new file mode 100644
index 000000000..92e10ea39
--- /dev/null
+++ b/src/pybind/mgr/cephadm/inventory.py
@@ -0,0 +1,1019 @@
+import datetime
+from copy import copy
+import ipaddress
+import json
+import logging
+import socket
+from typing import TYPE_CHECKING, Dict, List, Iterator, Optional, Any, Tuple, Set, Mapping, cast, \
+ NamedTuple, Type
+
+import orchestrator
+from ceph.deployment import inventory
+from ceph.deployment.service_spec import ServiceSpec, PlacementSpec
+from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
+from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types
+
+from .utils import resolve_ip
+from .migrations import queue_migrate_nfs_spec
+
+if TYPE_CHECKING:
+ from .module import CephadmOrchestrator
+
+
+logger = logging.getLogger(__name__)
+
+HOST_CACHE_PREFIX = "host."
+SPEC_STORE_PREFIX = "spec."
+
+
+class Inventory:
+ """
+ The inventory stores a HostSpec for all hosts persistently.
+ """
+
+ def __init__(self, mgr: 'CephadmOrchestrator'):
+ self.mgr = mgr
+ adjusted_addrs = False
+
+ def is_valid_ip(ip: str) -> bool:
+ try:
+ ipaddress.ip_address(ip)
+ return True
+ except ValueError:
+ return False
+
+ # load inventory
+ i = self.mgr.get_store('inventory')
+ if i:
+ self._inventory: Dict[str, dict] = json.loads(i)
+ # handle old clusters missing 'hostname' key from hostspec
+ for k, v in self._inventory.items():
+ if 'hostname' not in v:
+ v['hostname'] = k
+
+ # convert legacy non-IP addr?
+ if is_valid_ip(str(v.get('addr'))):
+ continue
+ if len(self._inventory) > 1:
+ if k == socket.gethostname():
+ # Never try to resolve our own host! This is
+ # fraught and can lead to either a loopback
+ # address (due to podman's futzing with
+ # /etc/hosts) or a private IP based on the CNI
+ # configuration. Instead, wait until the mgr
+ # fails over to another host and let them resolve
+ # this host.
+ continue
+ ip = resolve_ip(cast(str, v.get('addr')))
+ else:
+ # we only have 1 node in the cluster, so we can't
+ # rely on another host doing the lookup. use the
+ # IP the mgr binds to.
+ ip = self.mgr.get_mgr_ip()
+ if is_valid_ip(ip) and not ip.startswith('127.0.'):
+ self.mgr.log.info(
+ f"inventory: adjusted host {v['hostname']} addr '{v['addr']}' -> '{ip}'"
+ )
+ v['addr'] = ip
+ adjusted_addrs = True
+ if adjusted_addrs:
+ self.save()
+ else:
+ self._inventory = dict()
+ logger.debug('Loaded inventory %s' % self._inventory)
+
+ def keys(self) -> List[str]:
+ return list(self._inventory.keys())
+
+ def __contains__(self, host: str) -> bool:
+ return host in self._inventory
+
+ def assert_host(self, host: str) -> None:
+ if host not in self._inventory:
+ raise OrchestratorError('host %s does not exist' % host)
+
+ def add_host(self, spec: HostSpec) -> None:
+ if spec.hostname in self._inventory:
+ # addr
+ if self.get_addr(spec.hostname) != spec.addr:
+ self.set_addr(spec.hostname, spec.addr)
+ # labels
+ for label in spec.labels:
+ self.add_label(spec.hostname, label)
+ else:
+ self._inventory[spec.hostname] = spec.to_json()
+ self.save()
+
+ def rm_host(self, host: str) -> None:
+ self.assert_host(host)
+ del self._inventory[host]
+ self.save()
+
+ def set_addr(self, host: str, addr: str) -> None:
+ self.assert_host(host)
+ self._inventory[host]['addr'] = addr
+ self.save()
+
+ def add_label(self, host: str, label: str) -> None:
+ self.assert_host(host)
+
+ if 'labels' not in self._inventory[host]:
+ self._inventory[host]['labels'] = list()
+ if label not in self._inventory[host]['labels']:
+ self._inventory[host]['labels'].append(label)
+ self.save()
+
+ def rm_label(self, host: str, label: str) -> None:
+ self.assert_host(host)
+
+ if 'labels' not in self._inventory[host]:
+ self._inventory[host]['labels'] = list()
+ if label in self._inventory[host]['labels']:
+ self._inventory[host]['labels'].remove(label)
+ self.save()
+
+ def has_label(self, host: str, label: str) -> bool:
+ return (
+ host in self._inventory
+ and label in self._inventory[host].get('labels', [])
+ )
+
+ def get_addr(self, host: str) -> str:
+ self.assert_host(host)
+ return self._inventory[host].get('addr', host)
+
+ def spec_from_dict(self, info: dict) -> HostSpec:
+ hostname = info['hostname']
+ return HostSpec(
+ hostname,
+ addr=info.get('addr', hostname),
+ labels=info.get('labels', []),
+ status='Offline' if hostname in self.mgr.offline_hosts else info.get('status', ''),
+ )
+
+ def all_specs(self) -> List[HostSpec]:
+ return list(map(self.spec_from_dict, self._inventory.values()))
+
+ def get_host_with_state(self, state: str = "") -> List[str]:
+ """return a list of host names in a specific state"""
+ return [h for h in self._inventory if self._inventory[h].get("status", "").lower() == state]
+
+ def save(self) -> None:
+ self.mgr.set_store('inventory', json.dumps(self._inventory))
+
+
+class SpecDescription(NamedTuple):
+ spec: ServiceSpec
+ rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
+ created: datetime.datetime
+ deleted: Optional[datetime.datetime]
+
+
+class SpecStore():
+ def __init__(self, mgr):
+ # type: (CephadmOrchestrator) -> None
+ self.mgr = mgr
+ self._specs = {} # type: Dict[str, ServiceSpec]
+ # service_name -> rank -> gen -> daemon_id
+ self._rank_maps = {} # type: Dict[str, Dict[int, Dict[int, Optional[str]]]]
+ self.spec_created = {} # type: Dict[str, datetime.datetime]
+ self.spec_deleted = {} # type: Dict[str, datetime.datetime]
+ self.spec_preview = {} # type: Dict[str, ServiceSpec]
+
+ @property
+ def all_specs(self) -> Mapping[str, ServiceSpec]:
+ """
+ returns active and deleted specs. Returns read-only dict.
+ """
+ return self._specs
+
+ def __contains__(self, name: str) -> bool:
+ return name in self._specs
+
+ def __getitem__(self, name: str) -> SpecDescription:
+ if name not in self._specs:
+ raise OrchestratorError(f'Service {name} not found.')
+ return SpecDescription(self._specs[name],
+ self._rank_maps.get(name),
+ self.spec_created[name],
+ self.spec_deleted.get(name, None))
+
+ @property
+ def active_specs(self) -> Mapping[str, ServiceSpec]:
+ return {k: v for k, v in self._specs.items() if k not in self.spec_deleted}
+
+ def load(self):
+ # type: () -> None
+ for k, v in self.mgr.get_store_prefix(SPEC_STORE_PREFIX).items():
+ service_name = k[len(SPEC_STORE_PREFIX):]
+ try:
+ j = cast(Dict[str, dict], json.loads(v))
+ if (
+ (self.mgr.migration_current or 0) < 3
+ and j['spec'].get('service_type') == 'nfs'
+ ):
+ self.mgr.log.debug(f'found legacy nfs spec {j}')
+ queue_migrate_nfs_spec(self.mgr, j)
+ spec = ServiceSpec.from_json(j['spec'])
+ created = str_to_datetime(cast(str, j['created']))
+ self._specs[service_name] = spec
+ self.spec_created[service_name] = created
+
+ if 'deleted' in j:
+ deleted = str_to_datetime(cast(str, j['deleted']))
+ self.spec_deleted[service_name] = deleted
+
+ if 'rank_map' in j and isinstance(j['rank_map'], dict):
+ self._rank_maps[service_name] = {}
+ for rank_str, m in j['rank_map'].items():
+ try:
+ rank = int(rank_str)
+ except ValueError:
+ logger.exception(f"failed to parse rank in {j['rank_map']}")
+ continue
+ if isinstance(m, dict):
+ self._rank_maps[service_name][rank] = {}
+ for gen_str, name in m.items():
+ try:
+ gen = int(gen_str)
+ except ValueError:
+ logger.exception(f"failed to parse gen in {j['rank_map']}")
+ continue
+ if isinstance(name, str) or m is None:
+ self._rank_maps[service_name][rank][gen] = name
+
+ self.mgr.log.debug('SpecStore: loaded spec for %s' % (
+ service_name))
+ except Exception as e:
+ self.mgr.log.warning('unable to load spec for %s: %s' % (
+ service_name, e))
+ pass
+
+ def save(
+ self,
+ spec: ServiceSpec,
+ update_create: bool = True,
+ ) -> None:
+ name = spec.service_name()
+ if spec.preview_only:
+ self.spec_preview[name] = spec
+ return None
+ self._specs[name] = spec
+
+ if update_create:
+ self.spec_created[name] = datetime_now()
+ self._save(name)
+
+ def save_rank_map(self,
+ name: str,
+ rank_map: Dict[int, Dict[int, Optional[str]]]) -> None:
+ self._rank_maps[name] = rank_map
+ self._save(name)
+
+ def _save(self, name: str) -> None:
+ data: Dict[str, Any] = {
+ 'spec': self._specs[name].to_json(),
+ 'created': datetime_to_str(self.spec_created[name]),
+ }
+ if name in self._rank_maps:
+ data['rank_map'] = self._rank_maps[name]
+ if name in self.spec_deleted:
+ data['deleted'] = datetime_to_str(self.spec_deleted[name])
+
+ self.mgr.set_store(
+ SPEC_STORE_PREFIX + name,
+ json.dumps(data, sort_keys=True),
+ )
+ self.mgr.events.for_service(self._specs[name],
+ OrchestratorEvent.INFO,
+ 'service was created')
+
+ def rm(self, service_name: str) -> bool:
+ if service_name not in self._specs:
+ return False
+
+ if self._specs[service_name].preview_only:
+ self.finally_rm(service_name)
+ return True
+
+ self.spec_deleted[service_name] = datetime_now()
+ self.save(self._specs[service_name], update_create=False)
+ return True
+
+ def finally_rm(self, service_name):
+ # type: (str) -> bool
+ found = service_name in self._specs
+ if found:
+ del self._specs[service_name]
+ if service_name in self._rank_maps:
+ del self._rank_maps[service_name]
+ del self.spec_created[service_name]
+ if service_name in self.spec_deleted:
+ del self.spec_deleted[service_name]
+ self.mgr.set_store(SPEC_STORE_PREFIX + service_name, None)
+ return found
+
+ def get_created(self, spec: ServiceSpec) -> Optional[datetime.datetime]:
+ return self.spec_created.get(spec.service_name())
+
+
+class ClientKeyringSpec(object):
+ """
+ A client keyring file that we should maintain
+ """
+
+ def __init__(
+ self,
+ entity: str,
+ placement: PlacementSpec,
+ mode: Optional[int] = None,
+ uid: Optional[int] = None,
+ gid: Optional[int] = None,
+ ) -> None:
+ self.entity = entity
+ self.placement = placement
+ self.mode = mode or 0o600
+ self.uid = uid or 0
+ self.gid = gid or 0
+
+ def validate(self) -> None:
+ pass
+
+ def to_json(self) -> Dict[str, Any]:
+ return {
+ 'entity': self.entity,
+ 'placement': self.placement.to_json(),
+ 'mode': self.mode,
+ 'uid': self.uid,
+ 'gid': self.gid,
+ }
+
+ @property
+ def path(self) -> str:
+ return f'/etc/ceph/ceph.{self.entity}.keyring'
+
+ @classmethod
+ def from_json(cls: Type, data: dict) -> 'ClientKeyringSpec':
+ c = data.copy()
+ if 'placement' in c:
+ c['placement'] = PlacementSpec.from_json(c['placement'])
+ _cls = cls(**c)
+ _cls.validate()
+ return _cls
+
+
+class ClientKeyringStore():
+ """
+ Track client keyring files that we are supposed to maintain
+ """
+
+ def __init__(self, mgr):
+ # type: (CephadmOrchestrator) -> None
+ self.mgr: CephadmOrchestrator = mgr
+ self.mgr = mgr
+ self.keys: Dict[str, ClientKeyringSpec] = {}
+
+ def load(self) -> None:
+ c = self.mgr.get_store('client_keyrings') or b'{}'
+ j = json.loads(c)
+ for e, d in j.items():
+ self.keys[e] = ClientKeyringSpec.from_json(d)
+
+ def save(self) -> None:
+ data = {
+ k: v.to_json() for k, v in self.keys.items()
+ }
+ self.mgr.set_store('client_keyrings', json.dumps(data))
+
+ def update(self, ks: ClientKeyringSpec) -> None:
+ self.keys[ks.entity] = ks
+ self.save()
+
+ def rm(self, entity: str) -> None:
+ if entity in self.keys:
+ del self.keys[entity]
+ self.save()
+
+
+class HostCache():
+ """
+ HostCache stores different things:
+
+ 1. `daemons`: Deployed daemons O(daemons)
+
+ They're part of the configuration nowadays and need to be
+ persistent. The name "daemon cache" is unfortunately a bit misleading.
+ Like for example we really need to know where daemons are deployed on
+ hosts that are offline.
+
+ 2. `devices`: ceph-volume inventory cache O(hosts)
+
+ As soon as this is populated, it becomes more or less read-only.
+
+ 3. `networks`: network interfaces for each host. O(hosts)
+
+ This is needed in order to deploy MONs. As this is mostly read-only.
+
+ 4. `last_client_files` O(hosts)
+
+ Stores the last digest and owner/mode for files we've pushed to /etc/ceph
+ (ceph.conf or client keyrings).
+
+ 5. `scheduled_daemon_actions`: O(daemons)
+
+ Used to run daemon actions after deploying a daemon. We need to
+ store it persistently, in order to stay consistent across
+ MGR failovers.
+ """
+
+ def __init__(self, mgr):
+ # type: (CephadmOrchestrator) -> None
+ self.mgr: CephadmOrchestrator = mgr
+ self.daemons = {} # type: Dict[str, Dict[str, orchestrator.DaemonDescription]]
+ self.last_daemon_update = {} # type: Dict[str, datetime.datetime]
+ self.devices = {} # type: Dict[str, List[inventory.Device]]
+ self.facts = {} # type: Dict[str, Dict[str, Any]]
+ self.last_facts_update = {} # type: Dict[str, datetime.datetime]
+ self.last_autotune = {} # type: Dict[str, datetime.datetime]
+ self.osdspec_previews = {} # type: Dict[str, List[Dict[str, Any]]]
+ self.osdspec_last_applied = {} # type: Dict[str, Dict[str, datetime.datetime]]
+ self.networks = {} # type: Dict[str, Dict[str, Dict[str, List[str]]]]
+ self.last_device_update = {} # type: Dict[str, datetime.datetime]
+ self.last_device_change = {} # type: Dict[str, datetime.datetime]
+ self.daemon_refresh_queue = [] # type: List[str]
+ self.device_refresh_queue = [] # type: List[str]
+ self.osdspec_previews_refresh_queue = [] # type: List[str]
+
+ # host -> daemon name -> dict
+ self.daemon_config_deps = {} # type: Dict[str, Dict[str, Dict[str,Any]]]
+ self.last_host_check = {} # type: Dict[str, datetime.datetime]
+ self.loading_osdspec_preview = set() # type: Set[str]
+ self.last_client_files: Dict[str, Dict[str, Tuple[str, int, int, int]]] = {}
+ self.registry_login_queue: Set[str] = set()
+
+ self.scheduled_daemon_actions: Dict[str, Dict[str, str]] = {}
+
+ def load(self):
+ # type: () -> None
+ for k, v in self.mgr.get_store_prefix(HOST_CACHE_PREFIX).items():
+ host = k[len(HOST_CACHE_PREFIX):]
+ if host not in self.mgr.inventory:
+ self.mgr.log.warning('removing stray HostCache host record %s' % (
+ host))
+ self.mgr.set_store(k, None)
+ try:
+ j = json.loads(v)
+ if 'last_device_update' in j:
+ self.last_device_update[host] = str_to_datetime(j['last_device_update'])
+ else:
+ self.device_refresh_queue.append(host)
+ if 'last_device_change' in j:
+ self.last_device_change[host] = str_to_datetime(j['last_device_change'])
+ # for services, we ignore the persisted last_*_update
+ # and always trigger a new scrape on mgr restart.
+ self.daemon_refresh_queue.append(host)
+ self.daemons[host] = {}
+ self.osdspec_previews[host] = []
+ self.osdspec_last_applied[host] = {}
+ self.devices[host] = []
+ self.networks[host] = {}
+ self.daemon_config_deps[host] = {}
+ for name, d in j.get('daemons', {}).items():
+ self.daemons[host][name] = \
+ orchestrator.DaemonDescription.from_json(d)
+ for d in j.get('devices', []):
+ self.devices[host].append(inventory.Device.from_json(d))
+ self.networks[host] = j.get('networks_and_interfaces', {})
+ self.osdspec_previews[host] = j.get('osdspec_previews', {})
+ self.last_client_files[host] = j.get('last_client_files', {})
+ for name, ts in j.get('osdspec_last_applied', {}).items():
+ self.osdspec_last_applied[host][name] = str_to_datetime(ts)
+
+ for name, d in j.get('daemon_config_deps', {}).items():
+ self.daemon_config_deps[host][name] = {
+ 'deps': d.get('deps', []),
+ 'last_config': str_to_datetime(d['last_config']),
+ }
+ if 'last_host_check' in j:
+ self.last_host_check[host] = str_to_datetime(j['last_host_check'])
+ self.registry_login_queue.add(host)
+ self.scheduled_daemon_actions[host] = j.get('scheduled_daemon_actions', {})
+
+ self.mgr.log.debug(
+ 'HostCache.load: host %s has %d daemons, '
+ '%d devices, %d networks' % (
+ host, len(self.daemons[host]), len(self.devices[host]),
+ len(self.networks[host])))
+ except Exception as e:
+ self.mgr.log.warning('unable to load cached state for %s: %s' % (
+ host, e))
+ pass
+
+ def update_host_daemons(self, host, dm):
+ # type: (str, Dict[str, orchestrator.DaemonDescription]) -> None
+ self.daemons[host] = dm
+ self.last_daemon_update[host] = datetime_now()
+
+ def update_host_facts(self, host, facts):
+ # type: (str, Dict[str, Dict[str, Any]]) -> None
+ self.facts[host] = facts
+ self.last_facts_update[host] = datetime_now()
+
+ def update_autotune(self, host: str) -> None:
+ self.last_autotune[host] = datetime_now()
+
+ def invalidate_autotune(self, host: str) -> None:
+ if host in self.last_autotune:
+ del self.last_autotune[host]
+
+ def devices_changed(self, host: str, b: List[inventory.Device]) -> bool:
+ a = self.devices[host]
+ if len(a) != len(b):
+ return True
+ aj = {d.path: d.to_json() for d in a}
+ bj = {d.path: d.to_json() for d in b}
+ if aj != bj:
+ self.mgr.log.info("Detected new or changed devices on %s" % host)
+ return True
+ return False
+
+ def update_host_devices_networks(
+ self,
+ host: str,
+ dls: List[inventory.Device],
+ nets: Dict[str, Dict[str, List[str]]]
+ ) -> None:
+ if (
+ host not in self.devices
+ or host not in self.last_device_change
+ or self.devices_changed(host, dls)
+ ):
+ self.last_device_change[host] = datetime_now()
+ self.last_device_update[host] = datetime_now()
+ self.devices[host] = dls
+ self.networks[host] = nets
+
+ def update_daemon_config_deps(self, host: str, name: str, deps: List[str], stamp: datetime.datetime) -> None:
+ self.daemon_config_deps[host][name] = {
+ 'deps': deps,
+ 'last_config': stamp,
+ }
+
+ def update_last_host_check(self, host):
+ # type: (str) -> None
+ self.last_host_check[host] = datetime_now()
+
+ def update_osdspec_last_applied(self, host, service_name, ts):
+ # type: (str, str, datetime.datetime) -> None
+ self.osdspec_last_applied[host][service_name] = ts
+
+ def update_client_file(self,
+ host: str,
+ path: str,
+ digest: str,
+ mode: int,
+ uid: int,
+ gid: int) -> None:
+ if host not in self.last_client_files:
+ self.last_client_files[host] = {}
+ self.last_client_files[host][path] = (digest, mode, uid, gid)
+
+ def removed_client_file(self, host: str, path: str) -> None:
+ if (
+ host in self.last_client_files
+ and path in self.last_client_files[host]
+ ):
+ del self.last_client_files[host][path]
+
+ def prime_empty_host(self, host):
+ # type: (str) -> None
+ """
+ Install an empty entry for a host
+ """
+ self.daemons[host] = {}
+ self.devices[host] = []
+ self.networks[host] = {}
+ self.osdspec_previews[host] = []
+ self.osdspec_last_applied[host] = {}
+ self.daemon_config_deps[host] = {}
+ self.daemon_refresh_queue.append(host)
+ self.device_refresh_queue.append(host)
+ self.osdspec_previews_refresh_queue.append(host)
+ self.registry_login_queue.add(host)
+ self.last_client_files[host] = {}
+
+ def refresh_all_host_info(self, host):
+ # type: (str) -> None
+
+ self.last_host_check.pop(host, None)
+ self.daemon_refresh_queue.append(host)
+ self.registry_login_queue.add(host)
+ self.device_refresh_queue.append(host)
+ self.last_facts_update.pop(host, None)
+ self.osdspec_previews_refresh_queue.append(host)
+ self.last_autotune.pop(host, None)
+
+ def invalidate_host_daemons(self, host):
+ # type: (str) -> None
+ self.daemon_refresh_queue.append(host)
+ if host in self.last_daemon_update:
+ del self.last_daemon_update[host]
+ self.mgr.event.set()
+
+ def invalidate_host_devices(self, host):
+ # type: (str) -> None
+ self.device_refresh_queue.append(host)
+ if host in self.last_device_update:
+ del self.last_device_update[host]
+ self.mgr.event.set()
+
+ def distribute_new_registry_login_info(self) -> None:
+ self.registry_login_queue = set(self.mgr.inventory.keys())
+
+ def save_host(self, host: str) -> None:
+ j: Dict[str, Any] = {
+ 'daemons': {},
+ 'devices': [],
+ 'osdspec_previews': [],
+ 'osdspec_last_applied': {},
+ 'daemon_config_deps': {},
+ }
+ if host in self.last_daemon_update:
+ j['last_daemon_update'] = datetime_to_str(self.last_daemon_update[host])
+ if host in self.last_device_update:
+ j['last_device_update'] = datetime_to_str(self.last_device_update[host])
+ if host in self.last_device_change:
+ j['last_device_change'] = datetime_to_str(self.last_device_change[host])
+ if host in self.daemons:
+ for name, dd in self.daemons[host].items():
+ j['daemons'][name] = dd.to_json()
+ if host in self.devices:
+ for d in self.devices[host]:
+ j['devices'].append(d.to_json())
+ if host in self.networks:
+ j['networks_and_interfaces'] = self.networks[host]
+ if host in self.daemon_config_deps:
+ for name, depi in self.daemon_config_deps[host].items():
+ j['daemon_config_deps'][name] = {
+ 'deps': depi.get('deps', []),
+ 'last_config': datetime_to_str(depi['last_config']),
+ }
+ if host in self.osdspec_previews and self.osdspec_previews[host]:
+ j['osdspec_previews'] = self.osdspec_previews[host]
+ if host in self.osdspec_last_applied:
+ for name, ts in self.osdspec_last_applied[host].items():
+ j['osdspec_last_applied'][name] = datetime_to_str(ts)
+
+ if host in self.last_host_check:
+ j['last_host_check'] = datetime_to_str(self.last_host_check[host])
+
+ if host in self.last_client_files:
+ j['last_client_files'] = self.last_client_files[host]
+ if host in self.scheduled_daemon_actions:
+ j['scheduled_daemon_actions'] = self.scheduled_daemon_actions[host]
+
+ self.mgr.set_store(HOST_CACHE_PREFIX + host, json.dumps(j))
+
+ def rm_host(self, host):
+ # type: (str) -> None
+ if host in self.daemons:
+ del self.daemons[host]
+ if host in self.devices:
+ del self.devices[host]
+ if host in self.facts:
+ del self.facts[host]
+ if host in self.last_facts_update:
+ del self.last_facts_update[host]
+ if host in self.last_autotune:
+ del self.last_autotune[host]
+ if host in self.osdspec_previews:
+ del self.osdspec_previews[host]
+ if host in self.osdspec_last_applied:
+ del self.osdspec_last_applied[host]
+ if host in self.loading_osdspec_preview:
+ self.loading_osdspec_preview.remove(host)
+ if host in self.networks:
+ del self.networks[host]
+ if host in self.last_daemon_update:
+ del self.last_daemon_update[host]
+ if host in self.last_device_update:
+ del self.last_device_update[host]
+ if host in self.last_device_change:
+ del self.last_device_change[host]
+ if host in self.daemon_config_deps:
+ del self.daemon_config_deps[host]
+ if host in self.scheduled_daemon_actions:
+ del self.scheduled_daemon_actions[host]
+ if host in self.last_client_files:
+ del self.last_client_files[host]
+ self.mgr.set_store(HOST_CACHE_PREFIX + host, None)
+
+ def get_hosts(self):
+ # type: () -> List[str]
+ return list(self.daemons)
+
+ def get_facts(self, host: str) -> Dict[str, Any]:
+ return self.facts.get(host, {})
+
+ def _get_daemons(self) -> Iterator[orchestrator.DaemonDescription]:
+ for dm in self.daemons.copy().values():
+ yield from dm.values()
+
+ def get_daemons(self):
+ # type: () -> List[orchestrator.DaemonDescription]
+ return list(self._get_daemons())
+
+ def get_daemons_by_host(self, host: str) -> List[orchestrator.DaemonDescription]:
+ return list(self.daemons.get(host, {}).values())
+
+ def get_daemon(self, daemon_name: str, host: Optional[str] = None) -> orchestrator.DaemonDescription:
+ assert not daemon_name.startswith('ha-rgw.')
+ dds = self.get_daemons_by_host(host) if host else self._get_daemons()
+ for dd in dds:
+ if dd.name() == daemon_name:
+ return dd
+
+ raise orchestrator.OrchestratorError(f'Unable to find {daemon_name} daemon(s)')
+
+ def has_daemon(self, daemon_name: str, host: Optional[str] = None) -> bool:
+ try:
+ self.get_daemon(daemon_name, host)
+ except orchestrator.OrchestratorError:
+ return False
+ return True
+
+ def get_daemons_with_volatile_status(self) -> Iterator[Tuple[str, Dict[str, orchestrator.DaemonDescription]]]:
+ def alter(host: str, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription:
+ dd = copy(dd_orig)
+ if host in self.mgr.offline_hosts:
+ dd.status = orchestrator.DaemonDescriptionStatus.error
+ dd.status_desc = 'host is offline'
+ elif self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance":
+ # We do not refresh daemons on hosts in maintenance mode, so stored daemon statuses
+ # could be wrong. We must assume maintenance is working and daemons are stopped
+ dd.status = orchestrator.DaemonDescriptionStatus.stopped
+ dd.events = self.mgr.events.get_for_daemon(dd.name())
+ return dd
+
+ for host, dm in self.daemons.copy().items():
+ yield host, {name: alter(host, d) for name, d in dm.items()}
+
+ def get_daemons_by_service(self, service_name):
+ # type: (str) -> List[orchestrator.DaemonDescription]
+ assert not service_name.startswith('keepalived.')
+ assert not service_name.startswith('haproxy.')
+
+ return list(dd for dd in self._get_daemons() if dd.service_name() == service_name)
+
+ def get_daemons_by_type(self, service_type: str, host: str = '') -> List[orchestrator.DaemonDescription]:
+ assert service_type not in ['keepalived', 'haproxy']
+
+ daemons = self.daemons[host].values() if host else self._get_daemons()
+
+ return [d for d in daemons if d.daemon_type in service_to_daemon_types(service_type)]
+
+ def get_daemon_types(self, hostname: str) -> Set[str]:
+ """Provide a list of the types of daemons on the host"""
+ return cast(Set[str], {d.daemon_type for d in self.daemons[hostname].values()})
+
+ def get_daemon_names(self):
+ # type: () -> List[str]
+ return [d.name() for d in self._get_daemons()]
+
+ def get_daemon_last_config_deps(self, host: str, name: str) -> Tuple[Optional[List[str]], Optional[datetime.datetime]]:
+ if host in self.daemon_config_deps:
+ if name in self.daemon_config_deps[host]:
+ return self.daemon_config_deps[host][name].get('deps', []), \
+ self.daemon_config_deps[host][name].get('last_config', None)
+ return None, None
+
+ def get_host_client_files(self, host: str) -> Dict[str, Tuple[str, int, int, int]]:
+ return self.last_client_files.get(host, {})
+
+ def host_needs_daemon_refresh(self, host):
+ # type: (str) -> bool
+ if host in self.mgr.offline_hosts:
+ logger.debug(f'Host "{host}" marked as offline. Skipping daemon refresh')
+ return False
+ if host in self.daemon_refresh_queue:
+ self.daemon_refresh_queue.remove(host)
+ return True
+ cutoff = datetime_now() - datetime.timedelta(
+ seconds=self.mgr.daemon_cache_timeout)
+ if host not in self.last_daemon_update or self.last_daemon_update[host] < cutoff:
+ return True
+ return False
+
+ def host_needs_facts_refresh(self, host):
+ # type: (str) -> bool
+ if host in self.mgr.offline_hosts:
+ logger.debug(f'Host "{host}" marked as offline. Skipping gather facts refresh')
+ return False
+ cutoff = datetime_now() - datetime.timedelta(
+ seconds=self.mgr.facts_cache_timeout)
+ if host not in self.last_facts_update or self.last_facts_update[host] < cutoff:
+ return True
+ return False
+
+ def host_needs_autotune_memory(self, host):
+ # type: (str) -> bool
+ if host in self.mgr.offline_hosts:
+ logger.debug(f'Host "{host}" marked as offline. Skipping autotune')
+ return False
+ cutoff = datetime_now() - datetime.timedelta(
+ seconds=self.mgr.autotune_interval)
+ if host not in self.last_autotune or self.last_autotune[host] < cutoff:
+ return True
+ return False
+
+ def host_had_daemon_refresh(self, host: str) -> bool:
+ """
+ ... at least once.
+ """
+ if host in self.last_daemon_update:
+ return True
+ if host not in self.daemons:
+ return False
+ return bool(self.daemons[host])
+
+ def host_needs_device_refresh(self, host):
+ # type: (str) -> bool
+ if host in self.mgr.offline_hosts:
+ logger.debug(f'Host "{host}" marked as offline. Skipping device refresh')
+ return False
+ if host in self.device_refresh_queue:
+ self.device_refresh_queue.remove(host)
+ return True
+ cutoff = datetime_now() - datetime.timedelta(
+ seconds=self.mgr.device_cache_timeout)
+ if host not in self.last_device_update or self.last_device_update[host] < cutoff:
+ return True
+ return False
+
+ def host_needs_osdspec_preview_refresh(self, host: str) -> bool:
+ if host in self.mgr.offline_hosts:
+ logger.debug(f'Host "{host}" marked as offline. Skipping osdspec preview refresh')
+ return False
+ if host in self.osdspec_previews_refresh_queue:
+ self.osdspec_previews_refresh_queue.remove(host)
+ return True
+ # Since this is dependent on other factors (device and spec) this does not need
+ # to be updated periodically.
+ return False
+
+ def host_needs_check(self, host):
+ # type: (str) -> bool
+ cutoff = datetime_now() - datetime.timedelta(
+ seconds=self.mgr.host_check_interval)
+ return host not in self.last_host_check or self.last_host_check[host] < cutoff
+
+ def osdspec_needs_apply(self, host: str, spec: ServiceSpec) -> bool:
+ if (
+ host not in self.devices
+ or host not in self.last_device_change
+ or host not in self.last_device_update
+ or host not in self.osdspec_last_applied
+ or spec.service_name() not in self.osdspec_last_applied[host]
+ ):
+ return True
+ created = self.mgr.spec_store.get_created(spec)
+ if not created or created > self.last_device_change[host]:
+ return True
+ return self.osdspec_last_applied[host][spec.service_name()] < self.last_device_change[host]
+
+ def host_needs_registry_login(self, host: str) -> bool:
+ if host in self.mgr.offline_hosts:
+ return False
+ if host in self.registry_login_queue:
+ self.registry_login_queue.remove(host)
+ return True
+ return False
+
+ def add_daemon(self, host, dd):
+ # type: (str, orchestrator.DaemonDescription) -> None
+ assert host in self.daemons
+ self.daemons[host][dd.name()] = dd
+
+ def rm_daemon(self, host: str, name: str) -> None:
+ assert not name.startswith('ha-rgw.')
+
+ if host in self.daemons:
+ if name in self.daemons[host]:
+ del self.daemons[host][name]
+
+ def daemon_cache_filled(self) -> bool:
+ """
+ i.e. we have checked the daemons for each hosts at least once.
+ excluding offline hosts.
+
+ We're not checking for `host_needs_daemon_refresh`, as this might never be
+ False for all hosts.
+ """
+ return all((self.host_had_daemon_refresh(h) or h in self.mgr.offline_hosts)
+ for h in self.get_hosts())
+
+ def schedule_daemon_action(self, host: str, daemon_name: str, action: str) -> None:
+ assert not daemon_name.startswith('ha-rgw.')
+
+ priorities = {
+ 'start': 1,
+ 'restart': 2,
+ 'reconfig': 3,
+ 'redeploy': 4,
+ 'stop': 5,
+ }
+ existing_action = self.scheduled_daemon_actions.get(host, {}).get(daemon_name, None)
+ if existing_action and priorities[existing_action] > priorities[action]:
+ logger.debug(
+ f'skipping {action}ing {daemon_name}, cause {existing_action} already scheduled.')
+ return
+
+ if host not in self.scheduled_daemon_actions:
+ self.scheduled_daemon_actions[host] = {}
+ self.scheduled_daemon_actions[host][daemon_name] = action
+
+ def rm_scheduled_daemon_action(self, host: str, daemon_name: str) -> bool:
+ found = False
+ if host in self.scheduled_daemon_actions:
+ if daemon_name in self.scheduled_daemon_actions[host]:
+ del self.scheduled_daemon_actions[host][daemon_name]
+ found = True
+ if not self.scheduled_daemon_actions[host]:
+ del self.scheduled_daemon_actions[host]
+ return found
+
+ def get_scheduled_daemon_action(self, host: str, daemon: str) -> Optional[str]:
+ assert not daemon.startswith('ha-rgw.')
+
+ return self.scheduled_daemon_actions.get(host, {}).get(daemon)
+
+
+class EventStore():
+ def __init__(self, mgr):
+ # type: (CephadmOrchestrator) -> None
+ self.mgr: CephadmOrchestrator = mgr
+ self.events = {} # type: Dict[str, List[OrchestratorEvent]]
+
+ def add(self, event: OrchestratorEvent) -> None:
+
+ if event.kind_subject() not in self.events:
+ self.events[event.kind_subject()] = [event]
+
+ for e in self.events[event.kind_subject()]:
+ if e.message == event.message:
+ return
+
+ self.events[event.kind_subject()].append(event)
+
+ # limit to five events for now.
+ self.events[event.kind_subject()] = self.events[event.kind_subject()][-5:]
+
+ def for_service(self, spec: ServiceSpec, level: str, message: str) -> None:
+ e = OrchestratorEvent(datetime_now(), 'service',
+ spec.service_name(), level, message)
+ self.add(e)
+
+ def from_orch_error(self, e: OrchestratorError) -> None:
+ if e.event_subject is not None:
+ self.add(OrchestratorEvent(
+ datetime_now(),
+ e.event_subject[0],
+ e.event_subject[1],
+ "ERROR",
+ str(e)
+ ))
+
+ def for_daemon(self, daemon_name: str, level: str, message: str) -> None:
+ e = OrchestratorEvent(datetime_now(), 'daemon', daemon_name, level, message)
+ self.add(e)
+
+ def for_daemon_from_exception(self, daemon_name: str, e: Exception) -> None:
+ self.for_daemon(
+ daemon_name,
+ "ERROR",
+ str(e)
+ )
+
+ def cleanup(self) -> None:
+ # Needs to be properly done, in case events are persistently stored.
+
+ unknowns: List[str] = []
+ daemons = self.mgr.cache.get_daemon_names()
+ specs = self.mgr.spec_store.all_specs.keys()
+ for k_s, v in self.events.items():
+ kind, subject = k_s.split(':')
+ if kind == 'service':
+ if subject not in specs:
+ unknowns.append(k_s)
+ elif kind == 'daemon':
+ if subject not in daemons:
+ unknowns.append(k_s)
+
+ for k_s in unknowns:
+ del self.events[k_s]
+
+ def get_for_service(self, name: str) -> List[OrchestratorEvent]:
+ return self.events.get('service:' + name, [])
+
+ def get_for_daemon(self, name: str) -> List[OrchestratorEvent]:
+ return self.events.get('daemon:' + name, [])
diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py
new file mode 100644
index 000000000..f4a3056b2
--- /dev/null
+++ b/src/pybind/mgr/cephadm/migrations.py
@@ -0,0 +1,333 @@
+import json
+import logging
+from typing import TYPE_CHECKING, Iterator, Optional, Dict, Any
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec
+from cephadm.schedule import HostAssignment
+import rados
+
+from mgr_module import NFS_POOL_NAME
+from orchestrator import OrchestratorError, DaemonDescription
+
+if TYPE_CHECKING:
+ from .module import CephadmOrchestrator
+
+LAST_MIGRATION = 5
+
+logger = logging.getLogger(__name__)
+
+
+class Migrations:
+ def __init__(self, mgr: "CephadmOrchestrator"):
+ self.mgr = mgr
+
+ # Why having a global counter, instead of spec versions?
+ #
+ # for the first migration:
+ # The specs don't change in (this) migration. but the scheduler here.
+ # Adding the version to the specs at this time just felt wrong to me.
+ #
+ # And the specs are only another part of cephadm which needs potential upgrades.
+ # We have the cache, the inventory, the config store, the upgrade (imagine changing the
+ # upgrade code, while an old upgrade is still in progress), naming of daemons,
+ # fs-layout of the daemons, etc.
+ if self.mgr.migration_current is None:
+ self.set(LAST_MIGRATION)
+
+ v = mgr.get_store('nfs_migration_queue')
+ self.nfs_migration_queue = json.loads(v) if v else []
+
+ # for some migrations, we don't need to do anything except for
+ # incrementing migration_current.
+ # let's try to shortcut things here.
+ self.migrate(True)
+
+ def set(self, val: int) -> None:
+ self.mgr.set_module_option('migration_current', val)
+ self.mgr.migration_current = val
+
+ def is_migration_ongoing(self) -> bool:
+ return self.mgr.migration_current != LAST_MIGRATION
+
+ def verify_no_migration(self) -> None:
+ if self.is_migration_ongoing():
+ # this is raised in module.serve()
+ raise OrchestratorError(
+ "cephadm migration still ongoing. Please wait, until the migration is complete.")
+
+ def migrate(self, startup: bool = False) -> None:
+ if self.mgr.migration_current == 0:
+ if self.migrate_0_1():
+ self.set(1)
+
+ if self.mgr.migration_current == 1:
+ if self.migrate_1_2():
+ self.set(2)
+
+ if self.mgr.migration_current == 2 and not startup:
+ if self.migrate_2_3():
+ self.set(3)
+
+ if self.mgr.migration_current == 3:
+ if self.migrate_3_4():
+ self.set(4)
+
+ if self.mgr.migration_current == 4:
+ if self.migrate_4_5():
+ self.set(5)
+
+ def migrate_0_1(self) -> bool:
+ """
+ Migration 0 -> 1
+ New scheduler that takes PlacementSpec as the bound and not as recommendation.
+ I.e. the new scheduler won't suggest any new placements outside of the hosts
+ specified by label etc.
+
+ Which means, we have to make sure, we're not removing any daemons directly after
+ upgrading to the new scheduler.
+
+ There is a potential race here:
+ 1. user updates his spec to remove daemons
+ 2. mgr gets upgraded to new scheduler, before the old scheduler removed the daemon
+ 3. now, we're converting the spec to explicit placement, thus reverting (1.)
+ I think this is ok.
+ """
+
+ def interesting_specs() -> Iterator[ServiceSpec]:
+ for s in self.mgr.spec_store.all_specs.values():
+ if s.unmanaged:
+ continue
+ p = s.placement
+ if p is None:
+ continue
+ if p.count is None:
+ continue
+ if not p.hosts and not p.host_pattern and not p.label:
+ continue
+ yield s
+
+ def convert_to_explicit(spec: ServiceSpec) -> None:
+ existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())
+ placements, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=self.mgr.inventory.all_specs(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
+ daemons=existing_daemons,
+ ).place()
+
+ # We have to migrate, only if the new scheduler would remove daemons
+ if len(placements) >= len(existing_daemons):
+ return
+
+ def to_hostname(d: DaemonDescription) -> HostPlacementSpec:
+ if d.hostname in old_hosts:
+ return old_hosts[d.hostname]
+ else:
+ assert d.hostname
+ return HostPlacementSpec(d.hostname, '', '')
+
+ old_hosts = {h.hostname: h for h in spec.placement.hosts}
+ new_hosts = [to_hostname(d) for d in existing_daemons]
+
+ new_placement = PlacementSpec(
+ hosts=new_hosts,
+ count=spec.placement.count
+ )
+
+ new_spec = ServiceSpec.from_json(spec.to_json())
+ new_spec.placement = new_placement
+
+ logger.info(f"Migrating {spec.one_line_str()} to explicit placement")
+
+ self.mgr.spec_store.save(new_spec)
+
+ specs = list(interesting_specs())
+ if not specs:
+ return True # nothing to do. shortcut
+
+ if not self.mgr.cache.daemon_cache_filled():
+ logger.info("Unable to migrate yet. Daemon Cache still incomplete.")
+ return False
+
+ for spec in specs:
+ convert_to_explicit(spec)
+
+ return True
+
+ def migrate_1_2(self) -> bool:
+ """
+ After 15.2.4, we unified some service IDs: MONs, MGRs etc no longer have a service id.
+ Which means, the service names changed:
+
+ mon.foo -> mon
+ mgr.foo -> mgr
+
+ This fixes the data structure consistency
+ """
+ bad_specs = {}
+ for name, spec in self.mgr.spec_store.all_specs.items():
+ if name != spec.service_name():
+ bad_specs[name] = (spec.service_name(), spec)
+
+ for old, (new, old_spec) in bad_specs.items():
+ if new not in self.mgr.spec_store.all_specs:
+ spec = old_spec
+ else:
+ spec = self.mgr.spec_store.all_specs[new]
+ spec.unmanaged = True
+ self.mgr.spec_store.save(spec)
+ self.mgr.spec_store.finally_rm(old)
+
+ return True
+
+ def migrate_2_3(self) -> bool:
+ if self.nfs_migration_queue:
+ from nfs.cluster import create_ganesha_pool
+
+ create_ganesha_pool(self.mgr)
+ for service_id, pool, ns in self.nfs_migration_queue:
+ if pool != '.nfs':
+ self.migrate_nfs_spec(service_id, pool, ns)
+ self.nfs_migration_queue = []
+ self.mgr.log.info('Done migrating all NFS services')
+ return True
+
+ def migrate_nfs_spec(self, service_id: str, pool: str, ns: Optional[str]) -> None:
+ renamed = False
+ if service_id.startswith('ganesha-'):
+ service_id = service_id[8:]
+ renamed = True
+
+ self.mgr.log.info(
+ f'Migrating nfs.{service_id} from legacy pool {pool} namespace {ns}'
+ )
+
+ # read exports
+ ioctx = self.mgr.rados.open_ioctx(pool)
+ if ns is not None:
+ ioctx.set_namespace(ns)
+ object_iterator = ioctx.list_objects()
+ exports = []
+ while True:
+ try:
+ obj = object_iterator.__next__()
+ if obj.key.startswith('export-'):
+ self.mgr.log.debug(f'reading {obj.key}')
+ exports.append(obj.read().decode())
+ except StopIteration:
+ break
+ self.mgr.log.info(f'Found {len(exports)} exports for legacy nfs.{service_id}')
+
+ # copy grace file
+ if service_id != ns:
+ try:
+ grace = ioctx.read("grace")
+ new_ioctx = self.mgr.rados.open_ioctx(NFS_POOL_NAME)
+ new_ioctx.set_namespace(service_id)
+ new_ioctx.write_full("grace", grace)
+ self.mgr.log.info('Migrated nfs-ganesha grace file')
+ except rados.ObjectNotFound:
+ self.mgr.log.debug('failed to read old grace file; skipping')
+
+ if renamed and f'nfs.ganesha-{service_id}' in self.mgr.spec_store:
+ # rename from nfs.ganesha-* to nfs.*. This will destroy old daemons and
+ # deploy new ones.
+ self.mgr.log.info(f'Replacing nfs.ganesha-{service_id} with nfs.{service_id}')
+ spec = self.mgr.spec_store[f'nfs.ganesha-{service_id}'].spec
+ self.mgr.spec_store.rm(f'nfs.ganesha-{service_id}')
+ spec.service_id = service_id
+ self.mgr.spec_store.save(spec, True)
+
+ # We have to remove the old daemons here as well, otherwise we'll end up with a port conflict.
+ daemons = [d.name()
+ for d in self.mgr.cache.get_daemons_by_service(f'nfs.ganesha-{service_id}')]
+ self.mgr.log.info(f'Removing old nfs.ganesha-{service_id} daemons {daemons}')
+ self.mgr.remove_daemons(daemons)
+ else:
+ # redeploy all ganesha daemons to ensures that the daemon
+ # cephx are correct AND container configs are set up properly
+ daemons = [d.name() for d in self.mgr.cache.get_daemons_by_service(f'nfs.{service_id}')]
+ self.mgr.log.info(f'Removing old nfs.{service_id} daemons {daemons}')
+ self.mgr.remove_daemons(daemons)
+
+ # re-save service spec (without pool and namespace properties!)
+ spec = self.mgr.spec_store[f'nfs.{service_id}'].spec
+ self.mgr.spec_store.save(spec)
+
+ # import exports
+ for export in exports:
+ ex = ''
+ for line in export.splitlines():
+ if (
+ line.startswith(' secret_access_key =')
+ or line.startswith(' user_id =')
+ ):
+ continue
+ ex += line + '\n'
+ self.mgr.log.debug(f'importing export: {ex}')
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'nfs export apply',
+ 'cluster_id': service_id
+ }, inbuf=ex)
+ if ret:
+ self.mgr.log.warning(f'Failed to migrate export ({ret}): {err}\nExport was:\n{ex}')
+ self.mgr.log.info(f'Done migrating nfs.{service_id}')
+
+ def migrate_3_4(self) -> bool:
+ # We can't set any host with the _admin label, but we're
+ # going to warn when calling `ceph orch host rm...`
+ if 'client.admin' not in self.mgr.keys.keys:
+ self.mgr._client_keyring_set(
+ entity='client.admin',
+ placement='label:_admin',
+ )
+ return True
+
+ def migrate_4_5(self) -> bool:
+ registry_url = self.mgr.get_module_option('registry_url')
+ registry_username = self.mgr.get_module_option('registry_username')
+ registry_password = self.mgr.get_module_option('registry_password')
+ if registry_url and registry_username and registry_password:
+
+ registry_credentials = {'url': registry_url,
+ 'username': registry_username, 'password': registry_password}
+ self.mgr.set_store('registry_credentials', json.dumps(registry_credentials))
+
+ self.mgr.set_module_option('registry_url', None)
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': 'mgr',
+ 'key': 'mgr/cephadm/registry_url',
+ })
+ self.mgr.set_module_option('registry_username', None)
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': 'mgr',
+ 'key': 'mgr/cephadm/registry_username',
+ })
+ self.mgr.set_module_option('registry_password', None)
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': 'mgr',
+ 'key': 'mgr/cephadm/registry_password',
+ })
+
+ self.mgr.log.info('Done migrating registry login info')
+ return True
+
+
+def queue_migrate_nfs_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None:
+ """
+ After 16.2.5 we dropped the NFSServiceSpec pool and namespace properties.
+ Queue up a migration to process later, once we are sure that RADOS is available
+ and so on.
+ """
+ service_id = spec_dict['spec']['service_id']
+ args = spec_dict['spec'].get('spec', {})
+ pool = args.pop('pool', 'nfs-ganesha')
+ ns = args.pop('namespace', service_id)
+ queued = mgr.get_store('nfs_migration_queue') or '[]'
+ ls = json.loads(queued)
+ ls.append([service_id, pool, ns])
+ mgr.set_store('nfs_migration_queue', json.dumps(ls))
+ mgr.log.info(f'Queued nfs.{service_id} for migration')
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
new file mode 100644
index 000000000..9fc4298a8
--- /dev/null
+++ b/src/pybind/mgr/cephadm/module.py
@@ -0,0 +1,2974 @@
+import json
+import errno
+import ipaddress
+import logging
+import re
+import shlex
+from collections import defaultdict
+from configparser import ConfigParser
+from functools import wraps
+from tempfile import TemporaryDirectory
+from threading import Event
+
+import string
+from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \
+ Any, Set, TYPE_CHECKING, cast, NamedTuple, Sequence, Type
+
+import datetime
+import os
+import random
+import tempfile
+import multiprocessing.pool
+import subprocess
+from prettytable import PrettyTable
+
+from ceph.deployment import inventory
+from ceph.deployment.drive_group import DriveGroupSpec
+from ceph.deployment.service_spec import \
+ ServiceSpec, PlacementSpec, \
+ HostPlacementSpec, IngressSpec, IscsiServiceSpec
+from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
+from cephadm.serve import CephadmServe
+from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
+
+from mgr_module import MgrModule, HandleCommandResult, Option, NotifyType
+from mgr_util import create_self_signed_cert
+import secrets
+import orchestrator
+from orchestrator.module import to_format, Format
+
+from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpec, \
+ CLICommandMeta, DaemonDescription, DaemonDescriptionStatus, handle_orch_error, \
+ service_to_daemon_types
+from orchestrator._interface import GenericSpec
+from orchestrator._interface import daemon_type_to_service
+
+from . import remotes
+from . import utils
+from .migrations import Migrations
+from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
+ RbdMirrorService, CrashService, CephadmService, CephfsMirrorService
+from .services.ingress import IngressService
+from .services.container import CustomContainerService
+from .services.iscsi import IscsiService
+from .services.nfs import NFSService
+from .services.osd import OSDRemovalQueue, OSDService, OSD, NotFoundError
+from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \
+ NodeExporterService, SNMPGatewayService
+from .services.exporter import CephadmExporter, CephadmExporterConfig
+from .schedule import HostAssignment
+from .inventory import Inventory, SpecStore, HostCache, EventStore, ClientKeyringStore, ClientKeyringSpec
+from .upgrade import CephadmUpgrade
+from .template import TemplateMgr
+from .utils import CEPH_IMAGE_TYPES, RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES, forall_hosts, \
+ cephadmNoImage, CEPH_UPGRADE_ORDER
+from .configchecks import CephadmConfigChecks
+from .offline_watcher import OfflineHostWatcher
+
+try:
+ import remoto
+ # NOTE(mattoliverau) Patch remoto until remoto PR
+ # (https://github.com/alfredodeza/remoto/pull/56) lands
+ from distutils.version import StrictVersion
+ if StrictVersion(remoto.__version__) <= StrictVersion('1.2'):
+ def remoto_has_connection(self: Any) -> bool:
+ return self.gateway.hasreceiver()
+
+ from remoto.backends import BaseConnection
+ BaseConnection.has_connection = remoto_has_connection
+ import remoto.process
+except ImportError as e:
+ remoto = None
+ remoto_import_error = str(e)
+
+logger = logging.getLogger(__name__)
+
+T = TypeVar('T')
+
+DEFAULT_SSH_CONFIG = """
+Host *
+ User root
+ StrictHostKeyChecking no
+ UserKnownHostsFile /dev/null
+ ConnectTimeout=30
+"""
+
+# Default container images -----------------------------------------------------
+DEFAULT_IMAGE = 'quay.io/ceph/ceph'
+DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
+DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1'
+DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0'
+DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5'
+DEFAULT_HAPROXY_IMAGE = 'docker.io/library/haproxy:2.3'
+DEFAULT_KEEPALIVED_IMAGE = 'docker.io/arcts/keepalived'
+DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
+# ------------------------------------------------------------------------------
+
+
+def service_inactive(spec_name: str) -> Callable:
+ def inner(func: Callable) -> Callable:
+ @wraps(func)
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
+ obj = args[0]
+ if obj.get_store(f"spec.{spec_name}") is not None:
+ return 1, "", f"Unable to change configuration of an active service {spec_name}"
+ return func(*args, **kwargs)
+ return wrapper
+ return inner
+
+
+def host_exists(hostname_position: int = 1) -> Callable:
+ """Check that a hostname exists in the inventory"""
+ def inner(func: Callable) -> Callable:
+ @wraps(func)
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
+ this = args[0] # self object
+ hostname = args[hostname_position]
+ if hostname not in this.cache.get_hosts():
+ candidates = ','.join([h for h in this.cache.get_hosts() if h.startswith(hostname)])
+ help_msg = f"Did you mean {candidates}?" if candidates else ""
+ raise OrchestratorError(
+ f"Cannot find host '{hostname}' in the inventory. {help_msg}")
+
+ return func(*args, **kwargs)
+ return wrapper
+ return inner
+
+
+class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
+ metaclass=CLICommandMeta):
+
+ _STORE_HOST_PREFIX = "host"
+
+ instance = None
+ NOTIFY_TYPES = [NotifyType.mon_map, NotifyType.pg_summary]
+ NATIVE_OPTIONS = [] # type: List[Any]
+ MODULE_OPTIONS = [
+ Option(
+ 'ssh_config_file',
+ type='str',
+ default=None,
+ desc='customized SSH config file to connect to managed hosts',
+ ),
+ Option(
+ 'device_cache_timeout',
+ type='secs',
+ default=30 * 60,
+ desc='seconds to cache device inventory',
+ ),
+ Option(
+ 'device_enhanced_scan',
+ type='bool',
+ default=False,
+ desc='Use libstoragemgmt during device scans',
+ ),
+ Option(
+ 'daemon_cache_timeout',
+ type='secs',
+ default=10 * 60,
+ desc='seconds to cache service (daemon) inventory',
+ ),
+ Option(
+ 'facts_cache_timeout',
+ type='secs',
+ default=1 * 60,
+ desc='seconds to cache host facts data',
+ ),
+ Option(
+ 'host_check_interval',
+ type='secs',
+ default=10 * 60,
+ desc='how frequently to perform a host check',
+ ),
+ Option(
+ 'mode',
+ type='str',
+ enum_allowed=['root', 'cephadm-package'],
+ default='root',
+ desc='mode for remote execution of cephadm',
+ ),
+ Option(
+ 'container_image_base',
+ default=DEFAULT_IMAGE,
+ desc='Container image name, without the tag',
+ runtime=True,
+ ),
+ Option(
+ 'container_image_prometheus',
+ default=DEFAULT_PROMETHEUS_IMAGE,
+ desc='Prometheus container image',
+ ),
+ Option(
+ 'container_image_grafana',
+ default=DEFAULT_GRAFANA_IMAGE,
+ desc='Prometheus container image',
+ ),
+ Option(
+ 'container_image_alertmanager',
+ default=DEFAULT_ALERT_MANAGER_IMAGE,
+ desc='Prometheus container image',
+ ),
+ Option(
+ 'container_image_node_exporter',
+ default=DEFAULT_NODE_EXPORTER_IMAGE,
+ desc='Prometheus container image',
+ ),
+ Option(
+ 'container_image_haproxy',
+ default=DEFAULT_HAPROXY_IMAGE,
+ desc='HAproxy container image',
+ ),
+ Option(
+ 'container_image_keepalived',
+ default=DEFAULT_KEEPALIVED_IMAGE,
+ desc='Keepalived container image',
+ ),
+ Option(
+ 'container_image_snmp_gateway',
+ default=DEFAULT_SNMP_GATEWAY_IMAGE,
+ desc='SNMP Gateway container image',
+ ),
+ Option(
+ 'warn_on_stray_hosts',
+ type='bool',
+ default=True,
+ desc='raise a health warning if daemons are detected on a host '
+ 'that is not managed by cephadm',
+ ),
+ Option(
+ 'warn_on_stray_daemons',
+ type='bool',
+ default=True,
+ desc='raise a health warning if daemons are detected '
+ 'that are not managed by cephadm',
+ ),
+ Option(
+ 'warn_on_failed_host_check',
+ type='bool',
+ default=True,
+ desc='raise a health warning if the host check fails',
+ ),
+ Option(
+ 'log_to_cluster',
+ type='bool',
+ default=True,
+ desc='log to the "cephadm" cluster log channel"',
+ ),
+ Option(
+ 'allow_ptrace',
+ type='bool',
+ default=False,
+ desc='allow SYS_PTRACE capability on ceph containers',
+ long_desc='The SYS_PTRACE capability is needed to attach to a '
+ 'process with gdb or strace. Enabling this options '
+ 'can allow debugging daemons that encounter problems '
+ 'at runtime.',
+ ),
+ Option(
+ 'container_init',
+ type='bool',
+ default=True,
+ desc='Run podman/docker with `--init`'
+ ),
+ Option(
+ 'prometheus_alerts_path',
+ type='str',
+ default='/etc/prometheus/ceph/ceph_default_alerts.yml',
+ desc='location of alerts to include in prometheus deployments',
+ ),
+ Option(
+ 'migration_current',
+ type='int',
+ default=None,
+ desc='internal - do not modify',
+ # used to track track spec and other data migrations.
+ ),
+ Option(
+ 'config_dashboard',
+ type='bool',
+ default=True,
+ desc='manage configs like API endpoints in Dashboard.'
+ ),
+ Option(
+ 'manage_etc_ceph_ceph_conf',
+ type='bool',
+ default=False,
+ desc='Manage and own /etc/ceph/ceph.conf on the hosts.',
+ ),
+ Option(
+ 'manage_etc_ceph_ceph_conf_hosts',
+ type='str',
+ default='*',
+ desc='PlacementSpec describing on which hosts to manage /etc/ceph/ceph.conf',
+ ),
+ # not used anymore
+ Option(
+ 'registry_url',
+ type='str',
+ default=None,
+ desc='Registry url for login purposes. This is not the default registry'
+ ),
+ Option(
+ 'registry_username',
+ type='str',
+ default=None,
+ desc='Custom repository username. Only used for logging into a registry.'
+ ),
+ Option(
+ 'registry_password',
+ type='str',
+ default=None,
+ desc='Custom repository password. Only used for logging into a registry.'
+ ),
+ ####
+ Option(
+ 'registry_insecure',
+ type='bool',
+ default=False,
+ desc='Registry is to be considered insecure (no TLS available). Only for development purposes.'
+ ),
+ Option(
+ 'use_repo_digest',
+ type='bool',
+ default=True,
+ desc='Automatically convert image tags to image digest. Make sure all daemons use the same image',
+ ),
+ Option(
+ 'config_checks_enabled',
+ type='bool',
+ default=False,
+ desc='Enable or disable the cephadm configuration analysis',
+ ),
+ Option(
+ 'default_registry',
+ type='str',
+ default='docker.io',
+ desc='Search-registry to which we should normalize unqualified image names. '
+ 'This is not the default registry',
+ ),
+ Option(
+ 'max_count_per_host',
+ type='int',
+ default=10,
+ desc='max number of daemons per service per host',
+ ),
+ Option(
+ 'autotune_memory_target_ratio',
+ type='float',
+ default=.7,
+ desc='ratio of total system memory to divide amongst autotuned daemons'
+ ),
+ Option(
+ 'autotune_interval',
+ type='secs',
+ default=10 * 60,
+ desc='how frequently to autotune daemon memory'
+ ),
+ Option(
+ 'max_osd_draining_count',
+ type='int',
+ default=10,
+ desc='max number of osds that will be drained simultaneously when osds are removed'
+ ),
+ ]
+
+ def __init__(self, *args: Any, **kwargs: Any):
+ super(CephadmOrchestrator, self).__init__(*args, **kwargs)
+ self._cluster_fsid: str = self.get('mon_map')['fsid']
+ self.last_monmap: Optional[datetime.datetime] = None
+
+ # for serve()
+ self.run = True
+ self.event = Event()
+
+ if self.get_store('pause'):
+ self.paused = True
+ else:
+ self.paused = False
+
+ # for mypy which does not run the code
+ if TYPE_CHECKING:
+ self.ssh_config_file = None # type: Optional[str]
+ self.device_cache_timeout = 0
+ self.daemon_cache_timeout = 0
+ self.facts_cache_timeout = 0
+ self.host_check_interval = 0
+ self.max_count_per_host = 0
+ self.mode = ''
+ self.container_image_base = ''
+ self.container_image_prometheus = ''
+ self.container_image_grafana = ''
+ self.container_image_alertmanager = ''
+ self.container_image_node_exporter = ''
+ self.container_image_haproxy = ''
+ self.container_image_keepalived = ''
+ self.container_image_snmp_gateway = ''
+ self.warn_on_stray_hosts = True
+ self.warn_on_stray_daemons = True
+ self.warn_on_failed_host_check = True
+ self.allow_ptrace = False
+ self.container_init = True
+ self.prometheus_alerts_path = ''
+ self.migration_current: Optional[int] = None
+ self.config_dashboard = True
+ self.manage_etc_ceph_ceph_conf = True
+ self.manage_etc_ceph_ceph_conf_hosts = '*'
+ self.registry_url: Optional[str] = None
+ self.registry_username: Optional[str] = None
+ self.registry_password: Optional[str] = None
+ self.registry_insecure: bool = False
+ self.use_repo_digest = True
+ self.default_registry = ''
+ self.autotune_memory_target_ratio = 0.0
+ self.autotune_interval = 0
+ self.apply_spec_fails: List[Tuple[str, str]] = []
+ self.max_osd_draining_count = 10
+ self.device_enhanced_scan = False
+
+ self._cons: Dict[str, Tuple[remoto.backends.BaseConnection,
+ remoto.backends.LegacyModuleExecute]] = {}
+
+ self.notify(NotifyType.mon_map, None)
+ self.config_notify()
+
+ path = self.get_ceph_option('cephadm_path')
+ try:
+ assert isinstance(path, str)
+ with open(path, 'r') as f:
+ self._cephadm = f.read()
+ except (IOError, TypeError) as e:
+ raise RuntimeError("unable to read cephadm at '%s': %s" % (
+ path, str(e)))
+
+ self.cephadm_binary_path = self._get_cephadm_binary_path()
+
+ self._worker_pool = multiprocessing.pool.ThreadPool(10)
+
+ self._reconfig_ssh()
+
+ CephadmOrchestrator.instance = self
+
+ self.upgrade = CephadmUpgrade(self)
+
+ self.health_checks: Dict[str, dict] = {}
+
+ self.inventory = Inventory(self)
+
+ self.cache = HostCache(self)
+ self.cache.load()
+
+ self.to_remove_osds = OSDRemovalQueue(self)
+ self.to_remove_osds.load_from_store()
+
+ self.spec_store = SpecStore(self)
+ self.spec_store.load()
+
+ self.keys = ClientKeyringStore(self)
+ self.keys.load()
+
+ # ensure the host lists are in sync
+ for h in self.inventory.keys():
+ if h not in self.cache.daemons:
+ self.cache.prime_empty_host(h)
+ for h in self.cache.get_hosts():
+ if h not in self.inventory:
+ self.cache.rm_host(h)
+
+ # in-memory only.
+ self.events = EventStore(self)
+ self.offline_hosts: Set[str] = set()
+
+ self.migration = Migrations(self)
+
+ _service_clses: Sequence[Type[CephadmService]] = [
+ OSDService, NFSService, MonService, MgrService, MdsService,
+ RgwService, RbdMirrorService, GrafanaService, AlertmanagerService,
+ PrometheusService, NodeExporterService, CrashService, IscsiService,
+ IngressService, CustomContainerService, CephadmExporter, CephfsMirrorService,
+ SNMPGatewayService,
+ ]
+
+ # https://github.com/python/mypy/issues/8993
+ self.cephadm_services: Dict[str, CephadmService] = {
+ cls.TYPE: cls(self) for cls in _service_clses} # type: ignore
+
+ self.mgr_service: MgrService = cast(MgrService, self.cephadm_services['mgr'])
+ self.osd_service: OSDService = cast(OSDService, self.cephadm_services['osd'])
+ self.iscsi_service: IscsiService = cast(IscsiService, self.cephadm_services['iscsi'])
+
+ self.template = TemplateMgr(self)
+
+ self.requires_post_actions: Set[str] = set()
+ self.need_connect_dashboard_rgw = False
+
+ self.config_checker = CephadmConfigChecks(self)
+
+ self.offline_watcher = OfflineHostWatcher(self)
+ self.offline_watcher.start()
+
+ def shutdown(self) -> None:
+ self.log.debug('shutdown')
+ self._worker_pool.close()
+ self._worker_pool.join()
+ self.offline_watcher.shutdown()
+ self.run = False
+ self.event.set()
+
+ def _get_cephadm_service(self, service_type: str) -> CephadmService:
+ assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES
+ return self.cephadm_services[service_type]
+
+ def _get_cephadm_binary_path(self) -> str:
+ import hashlib
+ m = hashlib.sha256()
+ m.update(self._cephadm.encode())
+ return f'/var/lib/ceph/{self._cluster_fsid}/cephadm.{m.hexdigest()}'
+
+ def _kick_serve_loop(self) -> None:
+ self.log.debug('_kick_serve_loop')
+ self.event.set()
+
+ def serve(self) -> None:
+ """
+ The main loop of cephadm.
+
+ A command handler will typically change the declarative state
+ of cephadm. This loop will then attempt to apply this new state.
+ """
+ serve = CephadmServe(self)
+ serve.serve()
+
+ def set_container_image(self, entity: str, image: str) -> None:
+ self.check_mon_command({
+ 'prefix': 'config set',
+ 'name': 'container_image',
+ 'value': image,
+ 'who': entity,
+ })
+
+ def config_notify(self) -> None:
+ """
+ This method is called whenever one of our config options is changed.
+
+ TODO: this method should be moved into mgr_module.py
+ """
+ for opt in self.MODULE_OPTIONS:
+ setattr(self,
+ opt['name'], # type: ignore
+ self.get_module_option(opt['name'])) # type: ignore
+ self.log.debug(' mgr option %s = %s',
+ opt['name'], getattr(self, opt['name'])) # type: ignore
+ for opt in self.NATIVE_OPTIONS:
+ setattr(self,
+ opt, # type: ignore
+ self.get_ceph_option(opt))
+ self.log.debug(' native option %s = %s', opt, getattr(self, opt)) # type: ignore
+
+ self.event.set()
+
+ def notify(self, notify_type: NotifyType, notify_id: Optional[str]) -> None:
+ if notify_type == NotifyType.mon_map:
+ # get monmap mtime so we can refresh configs when mons change
+ monmap = self.get('mon_map')
+ self.last_monmap = str_to_datetime(monmap['modified'])
+ if self.last_monmap and self.last_monmap > datetime_now():
+ self.last_monmap = None # just in case clocks are skewed
+ if getattr(self, 'manage_etc_ceph_ceph_conf', False):
+ # getattr, due to notify() being called before config_notify()
+ self._kick_serve_loop()
+ if notify_type == NotifyType.pg_summary:
+ self._trigger_osd_removal()
+
+ def _trigger_osd_removal(self) -> None:
+ remove_queue = self.to_remove_osds.as_osd_ids()
+ if not remove_queue:
+ return
+ data = self.get("osd_stats")
+ for osd in data.get('osd_stats', []):
+ if osd.get('num_pgs') == 0:
+ # if _ANY_ osd that is currently in the queue appears to be empty,
+ # start the removal process
+ if int(osd.get('osd')) in remove_queue:
+ self.log.debug('Found empty osd. Starting removal process')
+ # if the osd that is now empty is also part of the removal queue
+ # start the process
+ self._kick_serve_loop()
+
+ def pause(self) -> None:
+ if not self.paused:
+ self.log.info('Paused')
+ self.set_store('pause', 'true')
+ self.paused = True
+ # wake loop so we update the health status
+ self._kick_serve_loop()
+
+ def resume(self) -> None:
+ if self.paused:
+ self.log.info('Resumed')
+ self.paused = False
+ self.set_store('pause', None)
+ # unconditionally wake loop so that 'orch resume' can be used to kick
+ # cephadm
+ self._kick_serve_loop()
+
+ def get_unique_name(
+ self,
+ daemon_type: str,
+ host: str,
+ existing: List[orchestrator.DaemonDescription],
+ prefix: Optional[str] = None,
+ forcename: Optional[str] = None,
+ rank: Optional[int] = None,
+ rank_generation: Optional[int] = None,
+ ) -> str:
+ """
+ Generate a unique random service name
+ """
+ suffix = daemon_type not in [
+ 'mon', 'crash',
+ 'prometheus', 'node-exporter', 'grafana', 'alertmanager',
+ 'container', 'cephadm-exporter', 'snmp-gateway'
+ ]
+ if forcename:
+ if len([d for d in existing if d.daemon_id == forcename]):
+ raise orchestrator.OrchestratorValidationError(
+ f'name {daemon_type}.{forcename} already in use')
+ return forcename
+
+ if '.' in host:
+ host = host.split('.')[0]
+ while True:
+ if prefix:
+ name = prefix + '.'
+ else:
+ name = ''
+ if rank is not None and rank_generation is not None:
+ name += f'{rank}.{rank_generation}.'
+ name += host
+ if suffix:
+ name += '.' + ''.join(random.choice(string.ascii_lowercase)
+ for _ in range(6))
+ if len([d for d in existing if d.daemon_id == name]):
+ if not suffix:
+ raise orchestrator.OrchestratorValidationError(
+ f'name {daemon_type}.{name} already in use')
+ self.log.debug('name %s exists, trying again', name)
+ continue
+ return name
+
+ def _reconfig_ssh(self) -> None:
+ temp_files = [] # type: list
+ ssh_options = [] # type: List[str]
+
+ # ssh_config
+ ssh_config_fname = self.ssh_config_file
+ ssh_config = self.get_store("ssh_config")
+ if ssh_config is not None or ssh_config_fname is None:
+ if not ssh_config:
+ ssh_config = DEFAULT_SSH_CONFIG
+ f = tempfile.NamedTemporaryFile(prefix='cephadm-conf-')
+ os.fchmod(f.fileno(), 0o600)
+ f.write(ssh_config.encode('utf-8'))
+ f.flush() # make visible to other processes
+ temp_files += [f]
+ ssh_config_fname = f.name
+ if ssh_config_fname:
+ self.validate_ssh_config_fname(ssh_config_fname)
+ ssh_options += ['-F', ssh_config_fname]
+ self.ssh_config = ssh_config
+
+ # identity
+ ssh_key = self.get_store("ssh_identity_key")
+ ssh_pub = self.get_store("ssh_identity_pub")
+ self.ssh_pub = ssh_pub
+ self.ssh_key = ssh_key
+ if ssh_key and ssh_pub:
+ tkey = tempfile.NamedTemporaryFile(prefix='cephadm-identity-')
+ tkey.write(ssh_key.encode('utf-8'))
+ os.fchmod(tkey.fileno(), 0o600)
+ tkey.flush() # make visible to other processes
+ tpub = open(tkey.name + '.pub', 'w')
+ os.fchmod(tpub.fileno(), 0o600)
+ tpub.write(ssh_pub)
+ tpub.flush() # make visible to other processes
+ temp_files += [tkey, tpub]
+ ssh_options += ['-i', tkey.name]
+
+ self._temp_files = temp_files
+ ssh_options += ['-o', 'ServerAliveInterval=7', '-o', 'ServerAliveCountMax=3']
+ self._ssh_options = ' '.join(ssh_options) # type: Optional[str]
+
+ if self.mode == 'root':
+ self.ssh_user = self.get_store('ssh_user', default='root')
+ elif self.mode == 'cephadm-package':
+ self.ssh_user = 'cephadm'
+
+ self._reset_cons()
+
+ def validate_ssh_config_content(self, ssh_config: Optional[str]) -> None:
+ if ssh_config is None or len(ssh_config.strip()) == 0:
+ raise OrchestratorValidationError('ssh_config cannot be empty')
+ # StrictHostKeyChecking is [yes|no] ?
+ res = re.findall(r'StrictHostKeyChecking\s+.*', ssh_config)
+ if not res:
+ raise OrchestratorValidationError('ssh_config requires StrictHostKeyChecking')
+ for s in res:
+ if 'ask' in s.lower():
+ raise OrchestratorValidationError(f'ssh_config cannot contain: \'{s}\'')
+
+ def validate_ssh_config_fname(self, ssh_config_fname: str) -> None:
+ if not os.path.isfile(ssh_config_fname):
+ raise OrchestratorValidationError("ssh_config \"{}\" does not exist".format(
+ ssh_config_fname))
+
+ def _reset_con(self, host: str) -> None:
+ conn, r = self._cons.get(host, (None, None))
+ if conn:
+ self.log.debug('_reset_con close %s' % host)
+ conn.exit()
+ del self._cons[host]
+
+ def _reset_cons(self) -> None:
+ for host, conn_and_r in self._cons.items():
+ self.log.debug('_reset_cons close %s' % host)
+ conn, r = conn_and_r
+ conn.exit()
+ self._cons = {}
+
+ def update_watched_hosts(self) -> None:
+ # currently, we are watching hosts with nfs daemons
+ hosts_to_watch = [d.hostname for d in self.cache.get_daemons(
+ ) if d.daemon_type in RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES]
+ self.offline_watcher.set_hosts(list(set([h for h in hosts_to_watch if h is not None])))
+
+ def offline_hosts_remove(self, host: str) -> None:
+ if host in self.offline_hosts:
+ self.offline_hosts.remove(host)
+
+ @staticmethod
+ def can_run() -> Tuple[bool, str]:
+ if remoto is not None:
+ return True, ""
+ else:
+ return False, "loading remoto library:{}".format(
+ remoto_import_error)
+
+ def available(self) -> Tuple[bool, str, Dict[str, Any]]:
+ """
+ The cephadm orchestrator is always available.
+ """
+ ok, err = self.can_run()
+ if not ok:
+ return ok, err, {}
+ if not self.ssh_key or not self.ssh_pub:
+ return False, 'SSH keys not set. Use `ceph cephadm set-priv-key` and `ceph cephadm set-pub-key` or `ceph cephadm generate-key`', {}
+
+ # mypy is unable to determine type for _processes since it's private
+ worker_count: int = self._worker_pool._processes # type: ignore
+ ret = {
+ "workers": worker_count,
+ "paused": self.paused,
+ }
+
+ return True, err, ret
+
+ def _validate_and_set_ssh_val(self, what: str, new: Optional[str], old: Optional[str]) -> None:
+ self.set_store(what, new)
+ self._reconfig_ssh()
+ if self.cache.get_hosts():
+ # Can't check anything without hosts
+ host = self.cache.get_hosts()[0]
+ r = CephadmServe(self)._check_host(host)
+ if r is not None:
+ # connection failed reset user
+ self.set_store(what, old)
+ self._reconfig_ssh()
+ raise OrchestratorError('ssh connection %s@%s failed' % (self.ssh_user, host))
+ self.log.info(f'Set ssh {what}')
+
+ @orchestrator._cli_write_command(
+ prefix='cephadm set-ssh-config')
+ def _set_ssh_config(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]:
+ """
+ Set the ssh_config file (use -i <ssh_config>)
+ """
+ # Set an ssh_config file provided from stdin
+
+ old = self.ssh_config
+ if inbuf == old:
+ return 0, "value unchanged", ""
+ self.validate_ssh_config_content(inbuf)
+ self._validate_and_set_ssh_val('ssh_config', inbuf, old)
+ return 0, "", ""
+
+ @orchestrator._cli_write_command('cephadm clear-ssh-config')
+ def _clear_ssh_config(self) -> Tuple[int, str, str]:
+ """
+ Clear the ssh_config file
+ """
+ # Clear the ssh_config file provided from stdin
+ self.set_store("ssh_config", None)
+ self.ssh_config_tmp = None
+ self.log.info('Cleared ssh_config')
+ self._reconfig_ssh()
+ return 0, "", ""
+
+ @orchestrator._cli_read_command('cephadm get-ssh-config')
+ def _get_ssh_config(self) -> HandleCommandResult:
+ """
+ Returns the ssh config as used by cephadm
+ """
+ if self.ssh_config_file:
+ self.validate_ssh_config_fname(self.ssh_config_file)
+ with open(self.ssh_config_file) as f:
+ return HandleCommandResult(stdout=f.read())
+ ssh_config = self.get_store("ssh_config")
+ if ssh_config:
+ return HandleCommandResult(stdout=ssh_config)
+ return HandleCommandResult(stdout=DEFAULT_SSH_CONFIG)
+
+ @orchestrator._cli_write_command('cephadm generate-key')
+ def _generate_key(self) -> Tuple[int, str, str]:
+ """
+ Generate a cluster SSH key (if not present)
+ """
+ if not self.ssh_pub or not self.ssh_key:
+ self.log.info('Generating ssh key...')
+ tmp_dir = TemporaryDirectory()
+ path = tmp_dir.name + '/key'
+ try:
+ subprocess.check_call([
+ '/usr/bin/ssh-keygen',
+ '-C', 'ceph-%s' % self._cluster_fsid,
+ '-N', '',
+ '-f', path
+ ])
+ with open(path, 'r') as f:
+ secret = f.read()
+ with open(path + '.pub', 'r') as f:
+ pub = f.read()
+ finally:
+ os.unlink(path)
+ os.unlink(path + '.pub')
+ tmp_dir.cleanup()
+ self.set_store('ssh_identity_key', secret)
+ self.set_store('ssh_identity_pub', pub)
+ self._reconfig_ssh()
+ return 0, '', ''
+
+ @orchestrator._cli_write_command(
+ 'cephadm set-priv-key')
+ def _set_priv_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]:
+ """Set cluster SSH private key (use -i <private_key>)"""
+ if inbuf is None or len(inbuf) == 0:
+ return -errno.EINVAL, "", "empty private ssh key provided"
+ old = self.ssh_key
+ if inbuf == old:
+ return 0, "value unchanged", ""
+ self._validate_and_set_ssh_val('ssh_identity_key', inbuf, old)
+ self.log.info('Set ssh private key')
+ return 0, "", ""
+
+ @orchestrator._cli_write_command(
+ 'cephadm set-pub-key')
+ def _set_pub_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]:
+ """Set cluster SSH public key (use -i <public_key>)"""
+ if inbuf is None or len(inbuf) == 0:
+ return -errno.EINVAL, "", "empty public ssh key provided"
+ old = self.ssh_pub
+ if inbuf == old:
+ return 0, "value unchanged", ""
+ self._validate_and_set_ssh_val('ssh_identity_pub', inbuf, old)
+ return 0, "", ""
+
+ @orchestrator._cli_write_command(
+ 'cephadm clear-key')
+ def _clear_key(self) -> Tuple[int, str, str]:
+ """Clear cluster SSH key"""
+ self.set_store('ssh_identity_key', None)
+ self.set_store('ssh_identity_pub', None)
+ self._reconfig_ssh()
+ self.log.info('Cleared cluster SSH key')
+ return 0, '', ''
+
+ @orchestrator._cli_read_command(
+ 'cephadm get-pub-key')
+ def _get_pub_key(self) -> Tuple[int, str, str]:
+ """Show SSH public key for connecting to cluster hosts"""
+ if self.ssh_pub:
+ return 0, self.ssh_pub, ''
+ else:
+ return -errno.ENOENT, '', 'No cluster SSH key defined'
+
+ @orchestrator._cli_read_command(
+ 'cephadm get-user')
+ def _get_user(self) -> Tuple[int, str, str]:
+ """
+ Show user for SSHing to cluster hosts
+ """
+ if self.ssh_user is None:
+ return -errno.ENOENT, '', 'No cluster SSH user configured'
+ else:
+ return 0, self.ssh_user, ''
+
+ @orchestrator._cli_read_command(
+ 'cephadm set-user')
+ def set_ssh_user(self, user: str) -> Tuple[int, str, str]:
+ """
+ Set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users
+ """
+ current_user = self.ssh_user
+ if user == current_user:
+ return 0, "value unchanged", ""
+
+ self._validate_and_set_ssh_val('ssh_user', user, current_user)
+ current_ssh_config = self._get_ssh_config()
+ new_ssh_config = re.sub(r"(\s{2}User\s)(.*)", r"\1" + user, current_ssh_config.stdout)
+ self._set_ssh_config(new_ssh_config)
+
+ msg = 'ssh user set to %s' % user
+ if user != 'root':
+ msg += '. sudo will be used'
+ self.log.info(msg)
+ return 0, msg, ''
+
+ @orchestrator._cli_read_command(
+ 'cephadm registry-login')
+ def registry_login(self, url: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> Tuple[int, str, str]:
+ """
+ Set custom registry login info by providing url, username and password or json file with login info (-i <file>)
+ """
+ # if password not given in command line, get it through file input
+ if not (url and username and password) and (inbuf is None or len(inbuf) == 0):
+ return -errno.EINVAL, "", ("Invalid arguments. Please provide arguments <url> <username> <password> "
+ "or -i <login credentials json file>")
+ elif (url and username and password):
+ registry_json = {'url': url, 'username': username, 'password': password}
+ else:
+ assert isinstance(inbuf, str)
+ registry_json = json.loads(inbuf)
+ if "url" not in registry_json or "username" not in registry_json or "password" not in registry_json:
+ return -errno.EINVAL, "", ("json provided for custom registry login did not include all necessary fields. "
+ "Please setup json file as\n"
+ "{\n"
+ " \"url\": \"REGISTRY_URL\",\n"
+ " \"username\": \"REGISTRY_USERNAME\",\n"
+ " \"password\": \"REGISTRY_PASSWORD\"\n"
+ "}\n")
+
+ # verify login info works by attempting login on random host
+ host = None
+ for host_name in self.inventory.keys():
+ host = host_name
+ break
+ if not host:
+ raise OrchestratorError('no hosts defined')
+ r = CephadmServe(self)._registry_login(host, registry_json)
+ if r is not None:
+ return 1, '', r
+ # if logins succeeded, store info
+ self.log.debug("Host logins successful. Storing login info.")
+ self.set_store('registry_credentials', json.dumps(registry_json))
+ # distribute new login info to all hosts
+ self.cache.distribute_new_registry_login_info()
+ return 0, "registry login scheduled", ''
+
+ @orchestrator._cli_read_command('cephadm check-host')
+ def check_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]:
+ """Check whether we can access and manage a remote host"""
+ try:
+ out, err, code = CephadmServe(self)._run_cephadm(host, cephadmNoImage, 'check-host',
+ ['--expect-hostname', host],
+ addr=addr,
+ error_ok=True, no_fsid=True)
+ if code:
+ return 1, '', ('check-host failed:\n' + '\n'.join(err))
+ except OrchestratorError:
+ self.log.exception(f"check-host failed for '{host}'")
+ return 1, '', ('check-host failed:\n'
+ + f"Host '{host}' not found. Use 'ceph orch host ls' to see all managed hosts.")
+ # if we have an outstanding health alert for this host, give the
+ # serve thread a kick
+ if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks:
+ for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']:
+ if item.startswith('host %s ' % host):
+ self.event.set()
+ return 0, '%s (%s) ok' % (host, addr), '\n'.join(err)
+
+ @orchestrator._cli_read_command(
+ 'cephadm prepare-host')
+ def _prepare_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]:
+ """Prepare a remote host for use with cephadm"""
+ out, err, code = CephadmServe(self)._run_cephadm(host, cephadmNoImage, 'prepare-host',
+ ['--expect-hostname', host],
+ addr=addr,
+ error_ok=True, no_fsid=True)
+ if code:
+ return 1, '', ('prepare-host failed:\n' + '\n'.join(err))
+ # if we have an outstanding health alert for this host, give the
+ # serve thread a kick
+ if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks:
+ for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']:
+ if item.startswith('host %s ' % host):
+ self.event.set()
+ return 0, '%s (%s) ok' % (host, addr), '\n'.join(err)
+
+ @orchestrator._cli_write_command(
+ prefix='cephadm set-extra-ceph-conf')
+ def _set_extra_ceph_conf(self, inbuf: Optional[str] = None) -> HandleCommandResult:
+ """
+ Text that is appended to all daemon's ceph.conf.
+ Mainly a workaround, till `config generate-minimal-conf` generates
+ a complete ceph.conf.
+
+ Warning: this is a dangerous operation.
+ """
+ if inbuf:
+ # sanity check.
+ cp = ConfigParser()
+ cp.read_string(inbuf, source='<infile>')
+
+ self.set_store("extra_ceph_conf", json.dumps({
+ 'conf': inbuf,
+ 'last_modified': datetime_to_str(datetime_now())
+ }))
+ self.log.info('Set extra_ceph_conf')
+ self._kick_serve_loop()
+ return HandleCommandResult()
+
+ @orchestrator._cli_read_command(
+ 'cephadm get-extra-ceph-conf')
+ def _get_extra_ceph_conf(self) -> HandleCommandResult:
+ """
+ Get extra ceph conf that is appended
+ """
+ return HandleCommandResult(stdout=self.extra_ceph_conf().conf)
+
+ def _set_exporter_config(self, config: Dict[str, str]) -> None:
+ self.set_store('exporter_config', json.dumps(config))
+
+ def _get_exporter_config(self) -> Dict[str, str]:
+ cfg_str = self.get_store('exporter_config')
+ return json.loads(cfg_str) if cfg_str else {}
+
+ def _set_exporter_option(self, option: str, value: Optional[str] = None) -> None:
+ kv_option = f'exporter_{option}'
+ self.set_store(kv_option, value)
+
+ def _get_exporter_option(self, option: str) -> Optional[str]:
+ kv_option = f'exporter_{option}'
+ return self.get_store(kv_option)
+
+ @orchestrator._cli_write_command(
+ prefix='cephadm generate-exporter-config')
+ @service_inactive('cephadm-exporter')
+ def _generate_exporter_config(self) -> Tuple[int, str, str]:
+ """
+ Generate default SSL crt/key and token for cephadm exporter daemons
+ """
+ self._set_exporter_defaults()
+ self.log.info('Default settings created for cephadm exporter(s)')
+ return 0, "", ""
+
+ def _set_exporter_defaults(self) -> None:
+ crt, key = self._generate_exporter_ssl()
+ token = self._generate_exporter_token()
+ self._set_exporter_config({
+ "crt": crt,
+ "key": key,
+ "token": token,
+ "port": CephadmExporterConfig.DEFAULT_PORT
+ })
+ self._set_exporter_option('enabled', 'true')
+
+ def _generate_exporter_ssl(self) -> Tuple[str, str]:
+ return create_self_signed_cert(dname={"O": "Ceph", "OU": "cephadm-exporter"})
+
+ def _generate_exporter_token(self) -> str:
+ return secrets.token_hex(32)
+
+ @orchestrator._cli_write_command(
+ prefix='cephadm clear-exporter-config')
+ @service_inactive('cephadm-exporter')
+ def _clear_exporter_config(self) -> Tuple[int, str, str]:
+ """
+ Clear the SSL configuration used by cephadm exporter daemons
+ """
+ self._clear_exporter_config_settings()
+ self.log.info('Cleared cephadm exporter configuration')
+ return 0, "", ""
+
+ def _clear_exporter_config_settings(self) -> None:
+ self.set_store('exporter_config', None)
+ self._set_exporter_option('enabled', None)
+
+ @orchestrator._cli_write_command(
+ prefix='cephadm set-exporter-config')
+ @service_inactive('cephadm-exporter')
+ def _store_exporter_config(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]:
+ """
+ Set custom cephadm-exporter configuration from a json file (-i <file>). JSON must contain crt, key, token and port
+ """
+ if not inbuf:
+ return 1, "", "JSON configuration has not been provided (-i <filename>)"
+
+ cfg = CephadmExporterConfig(self)
+ rc, reason = cfg.load_from_json(inbuf)
+ if rc:
+ return 1, "", reason
+
+ rc, reason = cfg.validate_config()
+ if rc:
+ return 1, "", reason
+
+ self._set_exporter_config({
+ "crt": cfg.crt,
+ "key": cfg.key,
+ "token": cfg.token,
+ "port": cfg.port
+ })
+ self.log.info("Loaded and verified the TLS configuration")
+ return 0, "", ""
+
+ @orchestrator._cli_read_command(
+ 'cephadm get-exporter-config')
+ def _show_exporter_config(self) -> Tuple[int, str, str]:
+ """
+ Show the current cephadm-exporter configuraion (JSON)'
+ """
+ cfg = self._get_exporter_config()
+ return 0, json.dumps(cfg, indent=2), ""
+
+ @orchestrator._cli_read_command('cephadm config-check ls')
+ def _config_checks_list(self, format: Format = Format.plain) -> HandleCommandResult:
+ """List the available configuration checks and their current state"""
+
+ if format not in [Format.plain, Format.json, Format.json_pretty]:
+ return HandleCommandResult(
+ retval=1,
+ stderr="Requested format is not supported when listing configuration checks"
+ )
+
+ if format in [Format.json, Format.json_pretty]:
+ return HandleCommandResult(
+ stdout=to_format(self.config_checker.health_checks,
+ format,
+ many=True,
+ cls=None))
+
+ # plain formatting
+ table = PrettyTable(
+ ['NAME',
+ 'HEALTHCHECK',
+ 'STATUS',
+ 'DESCRIPTION'
+ ], border=False)
+ table.align['NAME'] = 'l'
+ table.align['HEALTHCHECK'] = 'l'
+ table.align['STATUS'] = 'l'
+ table.align['DESCRIPTION'] = 'l'
+ table.left_padding_width = 0
+ table.right_padding_width = 2
+ for c in self.config_checker.health_checks:
+ table.add_row((
+ c.name,
+ c.healthcheck_name,
+ c.status,
+ c.description,
+ ))
+
+ return HandleCommandResult(stdout=table.get_string())
+
+ @orchestrator._cli_read_command('cephadm config-check status')
+ def _config_check_status(self) -> HandleCommandResult:
+ """Show whether the configuration checker feature is enabled/disabled"""
+ status = self.get_module_option('config_checks_enabled')
+ return HandleCommandResult(stdout="Enabled" if status else "Disabled")
+
+ @orchestrator._cli_write_command('cephadm config-check enable')
+ def _config_check_enable(self, check_name: str) -> HandleCommandResult:
+ """Enable a specific configuration check"""
+ if not self._config_check_valid(check_name):
+ return HandleCommandResult(retval=1, stderr="Invalid check name")
+
+ err, msg = self._update_config_check(check_name, 'enabled')
+ if err:
+ return HandleCommandResult(
+ retval=err,
+ stderr=f"Failed to enable check '{check_name}' : {msg}")
+
+ return HandleCommandResult(stdout="ok")
+
+ @orchestrator._cli_write_command('cephadm config-check disable')
+ def _config_check_disable(self, check_name: str) -> HandleCommandResult:
+ """Disable a specific configuration check"""
+ if not self._config_check_valid(check_name):
+ return HandleCommandResult(retval=1, stderr="Invalid check name")
+
+ err, msg = self._update_config_check(check_name, 'disabled')
+ if err:
+ return HandleCommandResult(retval=err, stderr=f"Failed to disable check '{check_name}': {msg}")
+ else:
+ # drop any outstanding raised healthcheck for this check
+ config_check = self.config_checker.lookup_check(check_name)
+ if config_check:
+ if config_check.healthcheck_name in self.health_checks:
+ self.health_checks.pop(config_check.healthcheck_name, None)
+ self.set_health_checks(self.health_checks)
+ else:
+ self.log.error(
+ f"Unable to resolve a check name ({check_name}) to a healthcheck definition?")
+
+ return HandleCommandResult(stdout="ok")
+
+ def _config_check_valid(self, check_name: str) -> bool:
+ return check_name in [chk.name for chk in self.config_checker.health_checks]
+
+ def _update_config_check(self, check_name: str, status: str) -> Tuple[int, str]:
+ checks_raw = self.get_store('config_checks')
+ if not checks_raw:
+ return 1, "config_checks setting is not available"
+
+ checks = json.loads(checks_raw)
+ checks.update({
+ check_name: status
+ })
+ self.log.info(f"updated config check '{check_name}' : {status}")
+ self.set_store('config_checks', json.dumps(checks))
+ return 0, ""
+
+ class ExtraCephConf(NamedTuple):
+ conf: str
+ last_modified: Optional[datetime.datetime]
+
+ def extra_ceph_conf(self) -> 'CephadmOrchestrator.ExtraCephConf':
+ data = self.get_store('extra_ceph_conf')
+ if not data:
+ return CephadmOrchestrator.ExtraCephConf('', None)
+ try:
+ j = json.loads(data)
+ except ValueError:
+ msg = 'Unable to load extra_ceph_conf: Cannot decode JSON'
+ self.log.exception('%s: \'%s\'', msg, data)
+ return CephadmOrchestrator.ExtraCephConf('', None)
+ return CephadmOrchestrator.ExtraCephConf(j['conf'], str_to_datetime(j['last_modified']))
+
+ def extra_ceph_conf_is_newer(self, dt: datetime.datetime) -> bool:
+ conf = self.extra_ceph_conf()
+ if not conf.last_modified:
+ return False
+ return conf.last_modified > dt
+
+ @orchestrator._cli_write_command(
+ 'cephadm osd activate'
+ )
+ def _osd_activate(self, host: List[str]) -> HandleCommandResult:
+ """
+ Start OSD containers for existing OSDs
+ """
+
+ @forall_hosts
+ def run(h: str) -> str:
+ return self.osd_service.deploy_osd_daemons_for_existing_osds(h, 'osd')
+
+ return HandleCommandResult(stdout='\n'.join(run(host)))
+
+ @orchestrator._cli_read_command('orch client-keyring ls')
+ def _client_keyring_ls(self, format: Format = Format.plain) -> HandleCommandResult:
+ """
+ List client keyrings under cephadm management
+ """
+ if format != Format.plain:
+ output = to_format(self.keys.keys.values(), format, many=True, cls=ClientKeyringSpec)
+ else:
+ table = PrettyTable(
+ ['ENTITY', 'PLACEMENT', 'MODE', 'OWNER', 'PATH'],
+ border=False)
+ table.align = 'l'
+ table.left_padding_width = 0
+ table.right_padding_width = 2
+ for ks in sorted(self.keys.keys.values(), key=lambda ks: ks.entity):
+ table.add_row((
+ ks.entity, ks.placement.pretty_str(),
+ utils.file_mode_to_str(ks.mode),
+ f'{ks.uid}:{ks.gid}',
+ ks.path,
+ ))
+ output = table.get_string()
+ return HandleCommandResult(stdout=output)
+
+ @orchestrator._cli_write_command('orch client-keyring set')
+ def _client_keyring_set(
+ self,
+ entity: str,
+ placement: str,
+ owner: Optional[str] = None,
+ mode: Optional[str] = None,
+ ) -> HandleCommandResult:
+ """
+ Add or update client keyring under cephadm management
+ """
+ if not entity.startswith('client.'):
+ raise OrchestratorError('entity must start with client.')
+ if owner:
+ try:
+ uid, gid = map(int, owner.split(':'))
+ except Exception:
+ raise OrchestratorError('owner must look like "<uid>:<gid>", e.g., "0:0"')
+ else:
+ uid = 0
+ gid = 0
+ if mode:
+ try:
+ imode = int(mode, 8)
+ except Exception:
+ raise OrchestratorError('mode must be an octal mode, e.g. "600"')
+ else:
+ imode = 0o600
+ pspec = PlacementSpec.from_string(placement)
+ ks = ClientKeyringSpec(entity, pspec, mode=imode, uid=uid, gid=gid)
+ self.keys.update(ks)
+ self._kick_serve_loop()
+ return HandleCommandResult()
+
+ @orchestrator._cli_write_command('orch client-keyring rm')
+ def _client_keyring_rm(
+ self,
+ entity: str,
+ ) -> HandleCommandResult:
+ """
+ Remove client keyring from cephadm management
+ """
+ self.keys.rm(entity)
+ self._kick_serve_loop()
+ return HandleCommandResult()
+
+ def _get_connection(self, host: str) -> Tuple['remoto.backends.BaseConnection',
+ 'remoto.backends.LegacyModuleExecute']:
+ """
+ Setup a connection for running commands on remote host.
+ """
+ conn, r = self._cons.get(host, (None, None))
+ if conn:
+ if conn.has_connection():
+ self.log.debug('Have connection to %s' % host)
+ return conn, r
+ else:
+ self._reset_con(host)
+ assert self.ssh_user
+ n = self.ssh_user + '@' + host
+ self.log.debug("Opening connection to {} with ssh options '{}'".format(
+ n, self._ssh_options))
+ child_logger = self.log.getChild(n)
+ child_logger.setLevel('WARNING')
+ conn = remoto.Connection(
+ n,
+ logger=child_logger,
+ ssh_options=self._ssh_options,
+ sudo=True if self.ssh_user != 'root' else False)
+
+ r = conn.import_module(remotes)
+ self._cons[host] = conn, r
+
+ return conn, r
+
+ def _executable_path(self, conn: 'remoto.backends.BaseConnection', executable: str) -> str:
+ """
+ Remote validator that accepts a connection object to ensure that a certain
+ executable is available returning its full path if so.
+
+ Otherwise an exception with thorough details will be raised, informing the
+ user that the executable was not found.
+ """
+ executable_path = conn.remote_module.which(executable)
+ if not executable_path:
+ raise RuntimeError("Executable '{}' not found on host '{}'".format(
+ executable, conn.hostname))
+ self.log.debug("Found executable '{}' at path '{}'".format(executable,
+ executable_path))
+ return executable_path
+
+ def _get_container_image(self, daemon_name: str) -> Optional[str]:
+ daemon_type = daemon_name.split('.', 1)[0] # type: ignore
+ image: Optional[str] = None
+ if daemon_type in CEPH_IMAGE_TYPES:
+ # get container image
+ image = str(self.get_foreign_ceph_option(
+ utils.name_to_config_section(daemon_name),
+ 'container_image'
+ )).strip()
+ elif daemon_type == 'prometheus':
+ image = self.container_image_prometheus
+ elif daemon_type == 'grafana':
+ image = self.container_image_grafana
+ elif daemon_type == 'alertmanager':
+ image = self.container_image_alertmanager
+ elif daemon_type == 'node-exporter':
+ image = self.container_image_node_exporter
+ elif daemon_type == 'haproxy':
+ image = self.container_image_haproxy
+ elif daemon_type == 'keepalived':
+ image = self.container_image_keepalived
+ elif daemon_type == CustomContainerService.TYPE:
+ # The image can't be resolved, the necessary information
+ # is only available when a container is deployed (given
+ # via spec).
+ image = None
+ elif daemon_type == 'snmp-gateway':
+ image = self.container_image_snmp_gateway
+ else:
+ assert False, daemon_type
+
+ self.log.debug('%s container image %s' % (daemon_name, image))
+
+ return image
+
+ def _schedulable_hosts(self) -> List[HostSpec]:
+ """
+ Returns all usable hosts that went through _refresh_host_daemons().
+
+ This mitigates a potential race, where new host was added *after*
+ ``_refresh_host_daemons()`` was called, but *before*
+ ``_apply_all_specs()`` was called. thus we end up with a hosts
+ where daemons might be running, but we have not yet detected them.
+ """
+ return [
+ h for h in self.inventory.all_specs()
+ if (
+ self.cache.host_had_daemon_refresh(h.hostname)
+ and '_no_schedule' not in h.labels
+ )
+ ]
+
+ def _unreachable_hosts(self) -> List[HostSpec]:
+ """
+ Return all hosts that are offline or in maintenance mode.
+
+ The idea is we should not touch the daemons on these hosts (since
+ in theory the hosts are inaccessible so we CAN'T touch them) but
+ we still want to count daemons that exist on these hosts toward the
+ placement so daemons on these hosts aren't just moved elsewhere
+ """
+ return [
+ h for h in self.inventory.all_specs()
+ if (
+ h.status.lower() in ['maintenance', 'offline']
+ or h.hostname in self.offline_hosts
+ )
+ ]
+
+ def _check_valid_addr(self, host: str, addr: str) -> str:
+ # make sure hostname is resolvable before trying to make a connection
+ try:
+ ip_addr = utils.resolve_ip(addr)
+ except OrchestratorError as e:
+ msg = str(e) + f'''
+You may need to supply an address for {addr}
+
+Please make sure that the host is reachable and accepts connections using the cephadm SSH key
+To add the cephadm SSH key to the host:
+> ceph cephadm get-pub-key > ~/ceph.pub
+> ssh-copy-id -f -i ~/ceph.pub {self.ssh_user}@{addr}
+
+To check that the host is reachable open a new shell with the --no-hosts flag:
+> cephadm shell --no-hosts
+
+Then run the following:
+> ceph cephadm get-ssh-config > ssh_config
+> ceph config-key get mgr/cephadm/ssh_identity_key > ~/cephadm_private_key
+> chmod 0600 ~/cephadm_private_key
+> ssh -F ssh_config -i ~/cephadm_private_key {self.ssh_user}@{addr}'''
+ raise OrchestratorError(msg)
+
+ if ipaddress.ip_address(ip_addr).is_loopback and host == addr:
+ # if this is a re-add, use old address. otherwise error
+ if host not in self.inventory or self.inventory.get_addr(host) == host:
+ raise OrchestratorError(
+ (f'Cannot automatically resolve ip address of host {host}. Ip resolved to loopback address: {ip_addr}\n'
+ + f'Please explicitly provide the address (ceph orch host add {host} --addr <ip-addr>)'))
+ self.log.debug(
+ f'Received loopback address resolving ip for {host}: {ip_addr}. Falling back to previous address.')
+ ip_addr = self.inventory.get_addr(host)
+ out, err, code = CephadmServe(self)._run_cephadm(
+ host, cephadmNoImage, 'check-host',
+ ['--expect-hostname', host],
+ addr=addr,
+ error_ok=True, no_fsid=True)
+ if code:
+ msg = 'check-host failed:\n' + '\n'.join(err)
+ # err will contain stdout and stderr, so we filter on the message text to
+ # only show the errors
+ errors = [_i.replace("ERROR: ", "") for _i in err if _i.startswith('ERROR')]
+ if errors:
+ msg = f'Host {host} ({addr}) failed check(s): {errors}'
+ raise OrchestratorError(msg)
+ return ip_addr
+
+ def _add_host(self, spec):
+ # type: (HostSpec) -> str
+ """
+ Add a host to be managed by the orchestrator.
+
+ :param host: host name
+ """
+ HostSpec.validate(spec)
+ ip_addr = self._check_valid_addr(spec.hostname, spec.addr)
+ if spec.addr == spec.hostname and ip_addr:
+ spec.addr = ip_addr
+
+ if spec.hostname in self.inventory and self.inventory.get_addr(spec.hostname) != spec.addr:
+ self.cache.refresh_all_host_info(spec.hostname)
+
+ # prime crush map?
+ if spec.location:
+ self.check_mon_command({
+ 'prefix': 'osd crush add-bucket',
+ 'name': spec.hostname,
+ 'type': 'host',
+ 'args': [f'{k}={v}' for k, v in spec.location.items()],
+ })
+
+ if spec.hostname not in self.inventory:
+ self.cache.prime_empty_host(spec.hostname)
+ self.inventory.add_host(spec)
+ self.offline_hosts_remove(spec.hostname)
+ if spec.status == 'maintenance':
+ self._set_maintenance_healthcheck()
+ self.event.set() # refresh stray health check
+ self.log.info('Added host %s' % spec.hostname)
+ return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr)
+
+ @handle_orch_error
+ def add_host(self, spec: HostSpec) -> str:
+ return self._add_host(spec)
+
+ @handle_orch_error
+ def remove_host(self, host: str, force: bool = False, offline: bool = False) -> str:
+ """
+ Remove a host from orchestrator management.
+
+ :param host: host name
+ :param force: bypass running daemons check
+ :param offline: remove offline host
+ """
+
+ # check if host is offline
+ host_offline = host in self.offline_hosts
+
+ if host_offline and not offline:
+ raise OrchestratorValidationError(
+ "{} is offline, please use --offline and --force to remove this host. This can potentially cause data loss".format(host))
+
+ if not host_offline and offline:
+ raise OrchestratorValidationError(
+ "{} is online, please remove host without --offline.".format(host))
+
+ if offline and not force:
+ raise OrchestratorValidationError("Removing an offline host requires --force")
+
+ # check if there are daemons on the host
+ if not force:
+ daemons = self.cache.get_daemons_by_host(host)
+ if daemons:
+ self.log.warning(f"Blocked {host} removal. Daemons running: {daemons}")
+
+ daemons_table = ""
+ daemons_table += "{:<20} {:<15}\n".format("type", "id")
+ daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15)
+ for d in daemons:
+ daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id)
+
+ raise OrchestratorValidationError("Not allowed to remove %s from cluster. "
+ "The following daemons are running in the host:"
+ "\n%s\nPlease run 'ceph orch host drain %s' to remove daemons from host" % (
+ host, daemons_table, host))
+
+ # check, if there we're removing the last _admin host
+ if not force:
+ p = PlacementSpec(label='_admin')
+ admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
+ if len(admin_hosts) == 1 and admin_hosts[0] == host:
+ raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'"
+ " label. Please add the '_admin' label to a host"
+ " or add --force to this command")
+
+ def run_cmd(cmd_args: dict) -> None:
+ ret, out, err = self.mon_command(cmd_args)
+ if ret != 0:
+ self.log.debug(f"ran {cmd_args} with mon_command")
+ self.log.error(
+ f"cmd: {cmd_args.get('prefix')} failed with: {err}. (errno:{ret})")
+ self.log.debug(f"cmd: {cmd_args.get('prefix')} returns: {out}")
+
+ if offline:
+ daemons = self.cache.get_daemons_by_host(host)
+ for d in daemons:
+ self.log.info(f"removing: {d.name()}")
+
+ if d.daemon_type != 'osd':
+ self.cephadm_services[str(d.daemon_type)].pre_remove(d)
+ self.cephadm_services[str(d.daemon_type)].post_remove(d, is_failed_deploy=False)
+ else:
+ cmd_args = {
+ 'prefix': 'osd purge-actual',
+ 'id': int(str(d.daemon_id)),
+ 'yes_i_really_mean_it': True
+ }
+ run_cmd(cmd_args)
+
+ cmd_args = {
+ 'prefix': 'osd crush rm',
+ 'name': host
+ }
+ run_cmd(cmd_args)
+
+ self.inventory.rm_host(host)
+ self.cache.rm_host(host)
+ self._reset_con(host)
+ self.event.set() # refresh stray health check
+ self.log.info('Removed host %s' % host)
+ return "Removed {} host '{}'".format('offline' if offline else '', host)
+
+ @handle_orch_error
+ def update_host_addr(self, host: str, addr: str) -> str:
+ self._check_valid_addr(host, addr)
+ self.inventory.set_addr(host, addr)
+ self._reset_con(host)
+ self.event.set() # refresh stray health check
+ self.log.info('Set host %s addr to %s' % (host, addr))
+ return "Updated host '{}' addr to '{}'".format(host, addr)
+
+ @handle_orch_error
+ def get_hosts(self):
+ # type: () -> List[orchestrator.HostSpec]
+ """
+ Return a list of hosts managed by the orchestrator.
+
+ Notes:
+ - skip async: manager reads from cache.
+ """
+ return list(self.inventory.all_specs())
+
+ @handle_orch_error
+ def get_facts(self, hostname: Optional[str] = None) -> List[Dict[str, Any]]:
+ """
+ Return a list of hosts metadata(gather_facts) managed by the orchestrator.
+
+ Notes:
+ - skip async: manager reads from cache.
+ """
+ if hostname:
+ return [self.cache.get_facts(hostname)]
+
+ return [self.cache.get_facts(hostname) for hostname in self.cache.get_hosts()]
+
+ @handle_orch_error
+ def add_host_label(self, host: str, label: str) -> str:
+ self.inventory.add_label(host, label)
+ self.log.info('Added label %s to host %s' % (label, host))
+ self._kick_serve_loop()
+ return 'Added label %s to host %s' % (label, host)
+
+ @handle_orch_error
+ def remove_host_label(self, host: str, label: str, force: bool = False) -> str:
+ # if we remove the _admin label from the only host that has it we could end up
+ # removing the only instance of the config and keyring and cause issues
+ if not force and label == '_admin':
+ p = PlacementSpec(label='_admin')
+ admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
+ if len(admin_hosts) == 1 and admin_hosts[0] == host:
+ raise OrchestratorValidationError(f"Host {host} is the last host with the '_admin'"
+ " label.\nRemoving the _admin label from this host could cause the removal"
+ " of the last cluster config/keyring managed by cephadm.\n"
+ "It is recommended to add the _admin label to another host"
+ " before completing this operation.\nIf you're certain this is"
+ " what you want rerun this command with --force.")
+ self.inventory.rm_label(host, label)
+ self.log.info('Removed label %s to host %s' % (label, host))
+ self._kick_serve_loop()
+ return 'Removed label %s from host %s' % (label, host)
+
+ def _host_ok_to_stop(self, hostname: str, force: bool = False) -> Tuple[int, str]:
+ self.log.debug("running host-ok-to-stop checks")
+ daemons = self.cache.get_daemons()
+ daemon_map: Dict[str, List[str]] = defaultdict(lambda: [])
+ for dd in daemons:
+ assert dd.hostname is not None
+ assert dd.daemon_type is not None
+ assert dd.daemon_id is not None
+ if dd.hostname == hostname:
+ daemon_map[dd.daemon_type].append(dd.daemon_id)
+
+ notifications: List[str] = []
+ error_notifications: List[str] = []
+ okay: bool = True
+ for daemon_type, daemon_ids in daemon_map.items():
+ r = self.cephadm_services[daemon_type_to_service(
+ daemon_type)].ok_to_stop(daemon_ids, force=force)
+ if r.retval:
+ okay = False
+ # collect error notifications so user can see every daemon causing host
+ # to not be okay to stop
+ error_notifications.append(r.stderr)
+ if r.stdout:
+ # if extra notifications to print for user, add them to notifications list
+ notifications.append(r.stdout)
+
+ if not okay:
+ # at least one daemon is not okay to stop
+ return 1, '\n'.join(error_notifications)
+
+ if notifications:
+ return 0, (f'It is presumed safe to stop host {hostname}. '
+ + 'Note the following:\n\n' + '\n'.join(notifications))
+ return 0, f'It is presumed safe to stop host {hostname}'
+
+ @handle_orch_error
+ def host_ok_to_stop(self, hostname: str) -> str:
+ if hostname not in self.cache.get_hosts():
+ raise OrchestratorError(f'Cannot find host "{hostname}"')
+
+ rc, msg = self._host_ok_to_stop(hostname)
+ if rc:
+ raise OrchestratorError(msg, errno=rc)
+
+ self.log.info(msg)
+ return msg
+
+ def _set_maintenance_healthcheck(self) -> None:
+ """Raise/update or clear the maintenance health check as needed"""
+
+ in_maintenance = self.inventory.get_host_with_state("maintenance")
+ if not in_maintenance:
+ self.remove_health_warning('HOST_IN_MAINTENANCE')
+ else:
+ s = "host is" if len(in_maintenance) == 1 else "hosts are"
+ self.set_health_warning("HOST_IN_MAINTENANCE", f"{len(in_maintenance)} {s} in maintenance mode", 1, [
+ f"{h} is in maintenance" for h in in_maintenance])
+
+ @handle_orch_error
+ @host_exists()
+ def enter_host_maintenance(self, hostname: str, force: bool = False) -> str:
+ """ Attempt to place a cluster host in maintenance
+
+ Placing a host into maintenance disables the cluster's ceph target in systemd
+ and stops all ceph daemons. If the host is an osd host we apply the noout flag
+ for the host subtree in crush to prevent data movement during a host maintenance
+ window.
+
+ :param hostname: (str) name of the host (must match an inventory hostname)
+
+ :raises OrchestratorError: Hostname is invalid, host is already in maintenance
+ """
+ if len(self.cache.get_hosts()) == 1:
+ raise OrchestratorError("Maintenance feature is not supported on single node clusters")
+
+ # if upgrade is active, deny
+ if self.upgrade.upgrade_state:
+ raise OrchestratorError(
+ f"Unable to place {hostname} in maintenance with upgrade active/paused")
+
+ tgt_host = self.inventory._inventory[hostname]
+ if tgt_host.get("status", "").lower() == "maintenance":
+ raise OrchestratorError(f"Host {hostname} is already in maintenance")
+
+ host_daemons = self.cache.get_daemon_types(hostname)
+ self.log.debug("daemons on host {}".format(','.join(host_daemons)))
+ if host_daemons:
+ # daemons on this host, so check the daemons can be stopped
+ # and if so, place the host into maintenance by disabling the target
+ rc, msg = self._host_ok_to_stop(hostname, force)
+ if rc:
+ raise OrchestratorError(
+ msg + '\nNote: Warnings can be bypassed with the --force flag', errno=rc)
+
+ # call the host-maintenance function
+ _out, _err, _code = CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, "host-maintenance",
+ ["enter"],
+ error_ok=True)
+ returned_msg = _err[0].split('\n')[-1]
+ if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'):
+ raise OrchestratorError(
+ f"Failed to place {hostname} into maintenance for cluster {self._cluster_fsid}")
+
+ if "osd" in host_daemons:
+ crush_node = hostname if '.' not in hostname else hostname.split('.')[0]
+ rc, out, err = self.mon_command({
+ 'prefix': 'osd set-group',
+ 'flags': 'noout',
+ 'who': [crush_node],
+ 'format': 'json'
+ })
+ if rc:
+ self.log.warning(
+ f"maintenance mode request for {hostname} failed to SET the noout group (rc={rc})")
+ raise OrchestratorError(
+ f"Unable to set the osds on {hostname} to noout (rc={rc})")
+ else:
+ self.log.info(
+ f"maintenance mode request for {hostname} has SET the noout group")
+
+ # update the host status in the inventory
+ tgt_host["status"] = "maintenance"
+ self.inventory._inventory[hostname] = tgt_host
+ self.inventory.save()
+
+ self._set_maintenance_healthcheck()
+ return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode'
+
+ @handle_orch_error
+ @host_exists()
+ def exit_host_maintenance(self, hostname: str) -> str:
+ """Exit maintenance mode and return a host to an operational state
+
+ Returning from maintnenance will enable the clusters systemd target and
+ start it, and remove any noout that has been added for the host if the
+ host has osd daemons
+
+ :param hostname: (str) host name
+
+ :raises OrchestratorError: Unable to return from maintenance, or unset the
+ noout flag
+ """
+ tgt_host = self.inventory._inventory[hostname]
+ if tgt_host['status'] != "maintenance":
+ raise OrchestratorError(f"Host {hostname} is not in maintenance mode")
+
+ outs, errs, _code = CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, 'host-maintenance',
+ ['exit'],
+ error_ok=True)
+ returned_msg = errs[0].split('\n')[-1]
+ if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'):
+ raise OrchestratorError(
+ f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}")
+
+ if "osd" in self.cache.get_daemon_types(hostname):
+ crush_node = hostname if '.' not in hostname else hostname.split('.')[0]
+ rc, _out, _err = self.mon_command({
+ 'prefix': 'osd unset-group',
+ 'flags': 'noout',
+ 'who': [crush_node],
+ 'format': 'json'
+ })
+ if rc:
+ self.log.warning(
+ f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})")
+ raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})")
+ else:
+ self.log.info(
+ f"exit maintenance request has UNSET for the noout group on host {hostname}")
+
+ # update the host record status
+ tgt_host['status'] = ""
+ self.inventory._inventory[hostname] = tgt_host
+ self.inventory.save()
+
+ self._set_maintenance_healthcheck()
+
+ return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode"
+
+ @handle_orch_error
+ @host_exists()
+ def rescan_host(self, hostname: str) -> str:
+ """Use cephadm to issue a disk rescan on each HBA
+
+ Some HBAs and external enclosures don't automatically register
+ device insertion with the kernel, so for these scenarios we need
+ to manually rescan
+
+ :param hostname: (str) host name
+ """
+ self.log.info(f'disk rescan request sent to host "{hostname}"')
+ _out, _err, _code = CephadmServe(self)._run_cephadm(hostname, cephadmNoImage, "disk-rescan",
+ [], no_fsid=True, error_ok=True)
+ if not _err:
+ raise OrchestratorError('Unexpected response from cephadm disk-rescan call')
+
+ msg = _err[0].split('\n')[-1]
+ log_msg = f'disk rescan: {msg}'
+ if msg.upper().startswith('OK'):
+ self.log.info(log_msg)
+ else:
+ self.log.warning(log_msg)
+
+ return f'{msg}'
+
+ def get_minimal_ceph_conf(self) -> str:
+ _, config, _ = self.check_mon_command({
+ "prefix": "config generate-minimal-conf",
+ })
+ extra = self.extra_ceph_conf().conf
+ if extra:
+ config += '\n\n' + extra.strip() + '\n'
+ return config
+
+ def _invalidate_daemons_and_kick_serve(self, filter_host: Optional[str] = None) -> None:
+ if filter_host:
+ self.cache.invalidate_host_daemons(filter_host)
+ else:
+ for h in self.cache.get_hosts():
+ # Also discover daemons deployed manually
+ self.cache.invalidate_host_daemons(h)
+
+ self._kick_serve_loop()
+
+ @handle_orch_error
+ def describe_service(self, service_type: Optional[str] = None, service_name: Optional[str] = None,
+ refresh: bool = False) -> List[orchestrator.ServiceDescription]:
+ if refresh:
+ self._invalidate_daemons_and_kick_serve()
+ self.log.debug('Kicked serve() loop to refresh all services')
+
+ sm: Dict[str, orchestrator.ServiceDescription] = {}
+
+ # known services
+ for nm, spec in self.spec_store.all_specs.items():
+ if service_type is not None and service_type != spec.service_type:
+ continue
+ if service_name is not None and service_name != nm:
+ continue
+
+ if spec.service_type != 'osd':
+ size = spec.placement.get_target_count(self._schedulable_hosts())
+ else:
+ # osd counting is special
+ size = 0
+
+ sm[nm] = orchestrator.ServiceDescription(
+ spec=spec,
+ size=size,
+ running=0,
+ events=self.events.get_for_service(spec.service_name()),
+ created=self.spec_store.spec_created[nm],
+ deleted=self.spec_store.spec_deleted.get(nm, None),
+ virtual_ip=spec.get_virtual_ip(),
+ ports=spec.get_port_start(),
+ )
+ if spec.service_type == 'ingress':
+ # ingress has 2 daemons running per host
+ sm[nm].size *= 2
+
+ # factor daemons into status
+ for h, dm in self.cache.get_daemons_with_volatile_status():
+ for name, dd in dm.items():
+ assert dd.hostname is not None, f'no hostname for {dd!r}'
+ assert dd.daemon_type is not None, f'no daemon_type for {dd!r}'
+
+ n: str = dd.service_name()
+
+ if (
+ service_type
+ and service_type != daemon_type_to_service(dd.daemon_type)
+ ):
+ continue
+ if service_name and service_name != n:
+ continue
+
+ if n not in sm:
+ # new unmanaged service
+ spec = ServiceSpec(
+ unmanaged=True,
+ service_type=daemon_type_to_service(dd.daemon_type),
+ service_id=dd.service_id(),
+ )
+ sm[n] = orchestrator.ServiceDescription(
+ last_refresh=dd.last_refresh,
+ container_image_id=dd.container_image_id,
+ container_image_name=dd.container_image_name,
+ spec=spec,
+ size=0,
+ )
+
+ if dd.status == DaemonDescriptionStatus.running:
+ sm[n].running += 1
+ if dd.daemon_type == 'osd':
+ # The osd count can't be determined by the Placement spec.
+ # Showing an actual/expected representation cannot be determined
+ # here. So we're setting running = size for now.
+ sm[n].size += 1
+ if (
+ not sm[n].last_refresh
+ or not dd.last_refresh
+ or dd.last_refresh < sm[n].last_refresh # type: ignore
+ ):
+ sm[n].last_refresh = dd.last_refresh
+
+ return list(sm.values())
+
+ @handle_orch_error
+ def list_daemons(self,
+ service_name: Optional[str] = None,
+ daemon_type: Optional[str] = None,
+ daemon_id: Optional[str] = None,
+ host: Optional[str] = None,
+ refresh: bool = False) -> List[orchestrator.DaemonDescription]:
+ if refresh:
+ self._invalidate_daemons_and_kick_serve(host)
+ self.log.debug('Kicked serve() loop to refresh all daemons')
+
+ result = []
+ for h, dm in self.cache.get_daemons_with_volatile_status():
+ if host and h != host:
+ continue
+ for name, dd in dm.items():
+ if daemon_type is not None and daemon_type != dd.daemon_type:
+ continue
+ if daemon_id is not None and daemon_id != dd.daemon_id:
+ continue
+ if service_name is not None and service_name != dd.service_name():
+ continue
+ if not dd.memory_request and dd.daemon_type in ['osd', 'mon']:
+ dd.memory_request = cast(Optional[int], self.get_foreign_ceph_option(
+ dd.name(),
+ f"{dd.daemon_type}_memory_target"
+ ))
+ result.append(dd)
+ return result
+
+ @handle_orch_error
+ def service_action(self, action: str, service_name: str) -> List[str]:
+ if service_name not in self.spec_store.all_specs.keys():
+ raise OrchestratorError(f'Invalid service name "{service_name}".'
+ + ' View currently running services using "ceph orch ls"')
+ dds: List[DaemonDescription] = self.cache.get_daemons_by_service(service_name)
+ if not dds:
+ raise OrchestratorError(f'No daemons exist under service name "{service_name}".'
+ + ' View currently running services using "ceph orch ls"')
+ if action == 'stop' and service_name.split('.')[0].lower() in ['mgr', 'mon', 'osd']:
+ return [f'Stopping entire {service_name} service is prohibited.']
+ self.log.info('%s service %s' % (action.capitalize(), service_name))
+ return [
+ self._schedule_daemon_action(dd.name(), action)
+ for dd in dds
+ ]
+
+ def _daemon_action(self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ action: str,
+ image: Optional[str] = None) -> str:
+ self._daemon_action_set_image(action, image, daemon_spec.daemon_type,
+ daemon_spec.daemon_id)
+
+ if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(daemon_spec.daemon_type,
+ daemon_spec.daemon_id):
+ self.mgr_service.fail_over()
+ return '' # unreachable
+
+ if action == 'redeploy' or action == 'reconfig':
+ if daemon_spec.daemon_type != 'osd':
+ daemon_spec = self.cephadm_services[daemon_type_to_service(
+ daemon_spec.daemon_type)].prepare_create(daemon_spec)
+ else:
+ # for OSDs, we still need to update config, just not carry out the full
+ # prepare_create function
+ daemon_spec.final_config, daemon_spec.deps = self.osd_service.generate_config(daemon_spec)
+ return CephadmServe(self)._create_daemon(daemon_spec, reconfig=(action == 'reconfig'))
+
+ actions = {
+ 'start': ['reset-failed', 'start'],
+ 'stop': ['stop'],
+ 'restart': ['reset-failed', 'restart'],
+ }
+ name = daemon_spec.name()
+ for a in actions[action]:
+ try:
+ out, err, code = CephadmServe(self)._run_cephadm(
+ daemon_spec.host, name, 'unit',
+ ['--name', name, a])
+ except Exception:
+ self.log.exception(f'`{daemon_spec.host}: cephadm unit {name} {a}` failed')
+ self.cache.invalidate_host_daemons(daemon_spec.host)
+ msg = "{} {} from host '{}'".format(action, name, daemon_spec.host)
+ self.events.for_daemon(name, 'INFO', msg)
+ return msg
+
+ def _daemon_action_set_image(self, action: str, image: Optional[str], daemon_type: str, daemon_id: str) -> None:
+ if image is not None:
+ if action != 'redeploy':
+ raise OrchestratorError(
+ f'Cannot execute {action} with new image. `action` needs to be `redeploy`')
+ if daemon_type not in CEPH_IMAGE_TYPES:
+ raise OrchestratorError(
+ f'Cannot redeploy {daemon_type}.{daemon_id} with a new image: Supported '
+ f'types are: {", ".join(CEPH_IMAGE_TYPES)}')
+
+ self.check_mon_command({
+ 'prefix': 'config set',
+ 'name': 'container_image',
+ 'value': image,
+ 'who': utils.name_to_config_section(daemon_type + '.' + daemon_id),
+ })
+
+ @handle_orch_error
+ def daemon_action(self, action: str, daemon_name: str, image: Optional[str] = None) -> str:
+ d = self.cache.get_daemon(daemon_name)
+ assert d.daemon_type is not None
+ assert d.daemon_id is not None
+
+ if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(d.daemon_type, d.daemon_id) \
+ and not self.mgr_service.mgr_map_has_standby():
+ raise OrchestratorError(
+ f'Unable to schedule redeploy for {daemon_name}: No standby MGRs')
+
+ self._daemon_action_set_image(action, image, d.daemon_type, d.daemon_id)
+
+ self.log.info(f'Schedule {action} daemon {daemon_name}')
+ return self._schedule_daemon_action(daemon_name, action)
+
+ def daemon_is_self(self, daemon_type: str, daemon_id: str) -> bool:
+ return daemon_type == 'mgr' and daemon_id == self.get_mgr_id()
+
+ def get_active_mgr_digests(self) -> List[str]:
+ digests = self.mgr_service.get_active_daemon(
+ self.cache.get_daemons_by_type('mgr')).container_image_digests
+ return digests if digests else []
+
+ def _schedule_daemon_action(self, daemon_name: str, action: str) -> str:
+ dd = self.cache.get_daemon(daemon_name)
+ assert dd.daemon_type is not None
+ assert dd.daemon_id is not None
+ assert dd.hostname is not None
+ if (action == 'redeploy' or action == 'restart') and self.daemon_is_self(dd.daemon_type, dd.daemon_id) \
+ and not self.mgr_service.mgr_map_has_standby():
+ raise OrchestratorError(
+ f'Unable to schedule redeploy for {daemon_name}: No standby MGRs')
+ self.cache.schedule_daemon_action(dd.hostname, dd.name(), action)
+ msg = "Scheduled to {} {} on host '{}'".format(action, daemon_name, dd.hostname)
+ self._kick_serve_loop()
+ return msg
+
+ @handle_orch_error
+ def remove_daemons(self, names):
+ # type: (List[str]) -> List[str]
+ args = []
+ for host, dm in self.cache.daemons.items():
+ for name in names:
+ if name in dm:
+ args.append((name, host))
+ if not args:
+ raise OrchestratorError('Unable to find daemon(s) %s' % (names))
+ self.log.info('Remove daemons %s' % ' '.join([a[0] for a in args]))
+ return self._remove_daemons(args)
+
+ @handle_orch_error
+ def remove_service(self, service_name: str, force: bool = False) -> str:
+ self.log.info('Remove service %s' % service_name)
+ self._trigger_preview_refresh(service_name=service_name)
+ if service_name in self.spec_store:
+ if self.spec_store[service_name].spec.service_type in ('mon', 'mgr'):
+ return f'Unable to remove {service_name} service.\n' \
+ f'Note, you might want to mark the {service_name} service as "unmanaged"'
+ else:
+ return f"Invalid service '{service_name}'. Use 'ceph orch ls' to list available services.\n"
+
+ # Report list of affected OSDs?
+ if not force and service_name.startswith('osd.'):
+ osds_msg = {}
+ for h, dm in self.cache.get_daemons_with_volatile_status():
+ osds_to_remove = []
+ for name, dd in dm.items():
+ if dd.daemon_type == 'osd' and dd.service_name() == service_name:
+ osds_to_remove.append(str(dd.daemon_id))
+ if osds_to_remove:
+ osds_msg[h] = osds_to_remove
+ if osds_msg:
+ msg = ''
+ for h, ls in osds_msg.items():
+ msg += f'\thost {h}: {" ".join([f"osd.{id}" for id in ls])}'
+ raise OrchestratorError(f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}')
+
+ found = self.spec_store.rm(service_name)
+ if found and service_name.startswith('osd.'):
+ self.spec_store.finally_rm(service_name)
+ self._kick_serve_loop()
+ return f'Removed service {service_name}'
+
+ @handle_orch_error
+ def get_inventory(self, host_filter: Optional[orchestrator.InventoryFilter] = None, refresh: bool = False) -> List[orchestrator.InventoryHost]:
+ """
+ Return the storage inventory of hosts matching the given filter.
+
+ :param host_filter: host filter
+
+ TODO:
+ - add filtering by label
+ """
+ if refresh:
+ if host_filter and host_filter.hosts:
+ for h in host_filter.hosts:
+ self.log.debug(f'will refresh {h} devs')
+ self.cache.invalidate_host_devices(h)
+ else:
+ for h in self.cache.get_hosts():
+ self.log.debug(f'will refresh {h} devs')
+ self.cache.invalidate_host_devices(h)
+
+ self.event.set()
+ self.log.debug('Kicked serve() loop to refresh devices')
+
+ result = []
+ for host, dls in self.cache.devices.items():
+ if host_filter and host_filter.hosts and host not in host_filter.hosts:
+ continue
+ result.append(orchestrator.InventoryHost(host,
+ inventory.Devices(dls)))
+ return result
+
+ @handle_orch_error
+ def zap_device(self, host: str, path: str) -> str:
+ """Zap a device on a managed host.
+
+ Use ceph-volume zap to return a device to an unused/free state
+
+ Args:
+ host (str): hostname of the cluster host
+ path (str): device path
+
+ Raises:
+ OrchestratorError: host is not a cluster host
+ OrchestratorError: host is in maintenance and therefore unavailable
+ OrchestratorError: device path not found on the host
+ OrchestratorError: device is known to a different ceph cluster
+ OrchestratorError: device holds active osd
+ OrchestratorError: device cache hasn't been populated yet..
+
+ Returns:
+ str: output from the zap command
+ """
+
+ self.log.info('Zap device %s:%s' % (host, path))
+
+ if host not in self.inventory.keys():
+ raise OrchestratorError(
+ f"Host '{host}' is not a member of the cluster")
+
+ host_info = self.inventory._inventory.get(host, {})
+ if host_info.get('status', '').lower() == 'maintenance':
+ raise OrchestratorError(
+ f"Host '{host}' is in maintenance mode, which prevents any actions against it.")
+
+ if host not in self.cache.devices:
+ raise OrchestratorError(
+ f"Host '{host} hasn't been scanned yet to determine it's inventory. Please try again later.")
+
+ host_devices = self.cache.devices[host]
+ path_found = False
+ osd_id_list: List[str] = []
+
+ for dev in host_devices:
+ if dev.path == path:
+ # match, so look a little deeper
+ if dev.lvs:
+ for lv in cast(List[Dict[str, str]], dev.lvs):
+ if lv.get('osd_id', ''):
+ lv_fsid = lv.get('cluster_fsid')
+ if lv_fsid != self._cluster_fsid:
+ raise OrchestratorError(
+ f"device {path} has lv's from a different Ceph cluster ({lv_fsid})")
+ osd_id_list.append(lv.get('osd_id', ''))
+ path_found = True
+ break
+ if not path_found:
+ raise OrchestratorError(
+ f"Device path '{path}' not found on host '{host}'")
+
+ if osd_id_list:
+ dev_name = os.path.basename(path)
+ active_osds: List[str] = []
+ for osd_id in osd_id_list:
+ metadata = self.get_metadata('osd', str(osd_id))
+ if metadata:
+ if metadata.get('hostname', '') == host and dev_name in metadata.get('devices', '').split(','):
+ active_osds.append("osd." + osd_id)
+ if active_osds:
+ raise OrchestratorError(
+ f"Unable to zap: device '{path}' on {host} has {len(active_osds)} active "
+ f"OSD{'s' if len(active_osds) > 1 else ''}"
+ f" ({', '.join(active_osds)}). Use 'ceph orch osd rm' first.")
+
+ out, err, code = CephadmServe(self)._run_cephadm(
+ host, 'osd', 'ceph-volume',
+ ['--', 'lvm', 'zap', '--destroy', path],
+ error_ok=True)
+
+ self.cache.invalidate_host_devices(host)
+ if code:
+ raise OrchestratorError('Zap failed: %s' % '\n'.join(out + err))
+ msg = f'zap successful for {path} on {host}'
+ self.log.info(msg)
+
+ return msg + '\n'
+
+ @handle_orch_error
+ def blink_device_light(self, ident_fault: str, on: bool, locs: List[orchestrator.DeviceLightLoc]) -> List[str]:
+ """
+ Blink a device light. Calling something like::
+
+ lsmcli local-disk-ident-led-on --path $path
+
+ If you must, you can customize this via::
+
+ ceph config-key set mgr/cephadm/blink_device_light_cmd '<my jinja2 template>'
+ ceph config-key set mgr/cephadm/<host>/blink_device_light_cmd '<my jinja2 template>'
+
+ See templates/blink_device_light_cmd.j2
+ """
+ @forall_hosts
+ def blink(host: str, dev: str, path: str) -> str:
+ cmd_line = self.template.render('blink_device_light_cmd.j2',
+ {
+ 'on': on,
+ 'ident_fault': ident_fault,
+ 'dev': dev,
+ 'path': path
+ },
+ host=host)
+ cmd_args = shlex.split(cmd_line)
+
+ out, err, code = CephadmServe(self)._run_cephadm(
+ host, 'osd', 'shell', ['--'] + cmd_args,
+ error_ok=True)
+ if code:
+ raise OrchestratorError(
+ 'Unable to affect %s light for %s:%s. Command: %s' % (
+ ident_fault, host, dev, ' '.join(cmd_args)))
+ self.log.info('Set %s light for %s:%s %s' % (
+ ident_fault, host, dev, 'on' if on else 'off'))
+ return "Set %s light for %s:%s %s" % (
+ ident_fault, host, dev, 'on' if on else 'off')
+
+ return blink(locs)
+
+ def get_osd_uuid_map(self, only_up=False):
+ # type: (bool) -> Dict[str, str]
+ osd_map = self.get('osd_map')
+ r = {}
+ for o in osd_map['osds']:
+ # only include OSDs that have ever started in this map. this way
+ # an interrupted osd create can be repeated and succeed the second
+ # time around.
+ osd_id = o.get('osd')
+ if osd_id is None:
+ raise OrchestratorError("Could not retrieve osd_id from osd_map")
+ if not only_up:
+ r[str(osd_id)] = o.get('uuid', '')
+ return r
+
+ def get_osd_by_id(self, osd_id: int) -> Optional[Dict[str, Any]]:
+ osd = [x for x in self.get('osd_map')['osds']
+ if x['osd'] == osd_id]
+
+ if len(osd) != 1:
+ return None
+
+ return osd[0]
+
+ def _trigger_preview_refresh(self,
+ specs: Optional[List[DriveGroupSpec]] = None,
+ service_name: Optional[str] = None,
+ ) -> None:
+ # Only trigger a refresh when a spec has changed
+ trigger_specs = []
+ if specs:
+ for spec in specs:
+ preview_spec = self.spec_store.spec_preview.get(spec.service_name())
+ # the to-be-preview spec != the actual spec, this means we need to
+ # trigger a refresh, if the spec has been removed (==None) we need to
+ # refresh as well.
+ if not preview_spec or spec != preview_spec:
+ trigger_specs.append(spec)
+ if service_name:
+ trigger_specs = [cast(DriveGroupSpec, self.spec_store.spec_preview.get(service_name))]
+ if not any(trigger_specs):
+ return None
+
+ refresh_hosts = self.osd_service.resolve_hosts_for_osdspecs(specs=trigger_specs)
+ for host in refresh_hosts:
+ self.log.info(f"Marking host: {host} for OSDSpec preview refresh.")
+ self.cache.osdspec_previews_refresh_queue.append(host)
+
+ @handle_orch_error
+ def apply_drivegroups(self, specs: List[DriveGroupSpec]) -> List[str]:
+ """
+ Deprecated. Please use `apply()` instead.
+
+ Keeping this around to be compapatible to mgr/dashboard
+ """
+ return [self._apply(spec) for spec in specs]
+
+ @handle_orch_error
+ def create_osds(self, drive_group: DriveGroupSpec) -> str:
+ hosts: List[HostSpec] = self.inventory.all_specs()
+ filtered_hosts: List[str] = drive_group.placement.filter_matching_hostspecs(hosts)
+ if not filtered_hosts:
+ return "Invalid 'host:device' spec: host not found in cluster. Please check 'ceph orch host ls' for available hosts"
+ return self.osd_service.create_from_spec(drive_group)
+
+ def _preview_osdspecs(self,
+ osdspecs: Optional[List[DriveGroupSpec]] = None
+ ) -> dict:
+ if not osdspecs:
+ return {'n/a': [{'error': True,
+ 'message': 'No OSDSpec or matching hosts found.'}]}
+ matching_hosts = self.osd_service.resolve_hosts_for_osdspecs(specs=osdspecs)
+ if not matching_hosts:
+ return {'n/a': [{'error': True,
+ 'message': 'No OSDSpec or matching hosts found.'}]}
+ # Is any host still loading previews or still in the queue to be previewed
+ pending_hosts = {h for h in self.cache.loading_osdspec_preview if h in matching_hosts}
+ if pending_hosts or any(item in self.cache.osdspec_previews_refresh_queue for item in matching_hosts):
+ # Report 'pending' when any of the matching hosts is still loading previews (flag is True)
+ return {'n/a': [{'error': True,
+ 'message': 'Preview data is being generated.. '
+ 'Please re-run this command in a bit.'}]}
+ # drop all keys that are not in search_hosts and only select reports that match the requested osdspecs
+ previews_for_specs = {}
+ for host, raw_reports in self.cache.osdspec_previews.items():
+ if host not in matching_hosts:
+ continue
+ osd_reports = []
+ for osd_report in raw_reports:
+ if osd_report.get('osdspec') in [x.service_id for x in osdspecs]:
+ osd_reports.append(osd_report)
+ previews_for_specs.update({host: osd_reports})
+ return previews_for_specs
+
+ def _calc_daemon_deps(self,
+ spec: Optional[ServiceSpec],
+ daemon_type: str,
+ daemon_id: str) -> List[str]:
+ deps = []
+ if daemon_type == 'haproxy':
+ # because cephadm creates new daemon instances whenever
+ # port or ip changes, identifying daemons by name is
+ # sufficient to detect changes.
+ if not spec:
+ return []
+ ingress_spec = cast(IngressSpec, spec)
+ assert ingress_spec.backend_service
+ daemons = self.cache.get_daemons_by_service(ingress_spec.backend_service)
+ deps = [d.name() for d in daemons]
+ elif daemon_type == 'keepalived':
+ # because cephadm creates new daemon instances whenever
+ # port or ip changes, identifying daemons by name is
+ # sufficient to detect changes.
+ if not spec:
+ return []
+ daemons = self.cache.get_daemons_by_service(spec.service_name())
+ deps = [d.name() for d in daemons if d.daemon_type == 'haproxy']
+ elif daemon_type == 'iscsi':
+ if spec:
+ iscsi_spec = cast(IscsiServiceSpec, spec)
+ deps = [self.iscsi_service.get_trusted_ips(iscsi_spec)]
+ else:
+ deps = [self.get_mgr_ip()]
+ else:
+ need = {
+ 'prometheus': ['mgr', 'alertmanager', 'node-exporter', 'ingress'],
+ 'grafana': ['prometheus'],
+ 'alertmanager': ['mgr', 'alertmanager', 'snmp-gateway'],
+ }
+ for dep_type in need.get(daemon_type, []):
+ for dd in self.cache.get_daemons_by_type(dep_type):
+ deps.append(dd.name())
+ if daemon_type == 'prometheus':
+ deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283)))
+ return sorted(deps)
+
+ @forall_hosts
+ def _remove_daemons(self, name: str, host: str) -> str:
+ return CephadmServe(self)._remove_daemon(name, host)
+
+ def _check_pool_exists(self, pool: str, service_name: str) -> None:
+ logger.info(f'Checking pool "{pool}" exists for service {service_name}')
+ if not self.rados.pool_exists(pool):
+ raise OrchestratorError(f'Cannot find pool "{pool}" for '
+ f'service {service_name}')
+
+ def _add_daemon(self,
+ daemon_type: str,
+ spec: ServiceSpec) -> List[str]:
+ """
+ Add (and place) a daemon. Require explicit host placement. Do not
+ schedule, and do not apply the related scheduling limitations.
+ """
+ if spec.service_name() not in self.spec_store:
+ raise OrchestratorError('Unable to add a Daemon without Service.\n'
+ 'Please use `ceph orch apply ...` to create a Service.\n'
+ 'Note, you might want to create the service with "unmanaged=true"')
+
+ self.log.debug('_add_daemon %s spec %s' % (daemon_type, spec.placement))
+ if not spec.placement.hosts:
+ raise OrchestratorError('must specify host(s) to deploy on')
+ count = spec.placement.count or len(spec.placement.hosts)
+ daemons = self.cache.get_daemons_by_service(spec.service_name())
+ return self._create_daemons(daemon_type, spec, daemons,
+ spec.placement.hosts, count)
+
+ def _create_daemons(self,
+ daemon_type: str,
+ spec: ServiceSpec,
+ daemons: List[DaemonDescription],
+ hosts: List[HostPlacementSpec],
+ count: int) -> List[str]:
+ if count > len(hosts):
+ raise OrchestratorError('too few hosts: want %d, have %s' % (
+ count, hosts))
+
+ did_config = False
+ service_type = daemon_type_to_service(daemon_type)
+
+ args = [] # type: List[CephadmDaemonDeploySpec]
+ for host, network, name in hosts:
+ daemon_id = self.get_unique_name(daemon_type, host, daemons,
+ prefix=spec.service_id,
+ forcename=name)
+
+ if not did_config:
+ self.cephadm_services[service_type].config(spec)
+ did_config = True
+
+ daemon_spec = self.cephadm_services[service_type].make_daemon_spec(
+ host, daemon_id, network, spec,
+ # NOTE: this does not consider port conflicts!
+ ports=spec.get_port_start())
+ self.log.debug('Placing %s.%s on host %s' % (
+ daemon_type, daemon_id, host))
+ args.append(daemon_spec)
+
+ # add to daemon list so next name(s) will also be unique
+ sd = orchestrator.DaemonDescription(
+ hostname=host,
+ daemon_type=daemon_type,
+ daemon_id=daemon_id,
+ )
+ daemons.append(sd)
+
+ @ forall_hosts
+ def create_func_map(*args: Any) -> str:
+ daemon_spec = self.cephadm_services[daemon_type].prepare_create(*args)
+ return CephadmServe(self)._create_daemon(daemon_spec)
+
+ return create_func_map(args)
+
+ @handle_orch_error
+ def add_daemon(self, spec: ServiceSpec) -> List[str]:
+ ret: List[str] = []
+ try:
+ with orchestrator.set_exception_subject('service', spec.service_name(), overwrite=True):
+ for d_type in service_to_daemon_types(spec.service_type):
+ ret.extend(self._add_daemon(d_type, spec))
+ return ret
+ except OrchestratorError as e:
+ self.events.from_orch_error(e)
+ raise
+
+ @handle_orch_error
+ def apply_mon(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ def _apply(self, spec: GenericSpec) -> str:
+ if spec.service_type == 'host':
+ return self._add_host(cast(HostSpec, spec))
+
+ if spec.service_type == 'osd':
+ # _trigger preview refresh needs to be smart and
+ # should only refresh if a change has been detected
+ self._trigger_preview_refresh(specs=[cast(DriveGroupSpec, spec)])
+
+ return self._apply_service_spec(cast(ServiceSpec, spec))
+
+ def set_health_warning(self, name: str, summary: str, count: int, detail: List[str]) -> None:
+ self.health_checks[name] = {
+ 'severity': 'warning',
+ 'summary': summary,
+ 'count': count,
+ 'detail': detail,
+ }
+ self.set_health_checks(self.health_checks)
+
+ def remove_health_warning(self, name: str) -> None:
+ if name in self.health_checks:
+ del self.health_checks[name]
+ self.set_health_checks(self.health_checks)
+
+ def _plan(self, spec: ServiceSpec) -> dict:
+ if spec.service_type == 'osd':
+ return {'service_name': spec.service_name(),
+ 'service_type': spec.service_type,
+ 'data': self._preview_osdspecs(osdspecs=[cast(DriveGroupSpec, spec)])}
+
+ svc = self.cephadm_services[spec.service_type]
+ ha = HostAssignment(
+ spec=spec,
+ hosts=self._schedulable_hosts(),
+ unreachable_hosts=self._unreachable_hosts(),
+ networks=self.cache.networks,
+ daemons=self.cache.get_daemons_by_service(spec.service_name()),
+ allow_colo=svc.allow_colo(),
+ rank_map=self.spec_store[spec.service_name()].rank_map if svc.ranked() else None
+ )
+ ha.validate()
+ hosts, to_add, to_remove = ha.place()
+
+ return {
+ 'service_name': spec.service_name(),
+ 'service_type': spec.service_type,
+ 'add': [hs.hostname for hs in to_add],
+ 'remove': [d.name() for d in to_remove]
+ }
+
+ @handle_orch_error
+ def plan(self, specs: Sequence[GenericSpec]) -> List:
+ results = [{'warning': 'WARNING! Dry-Runs are snapshots of a certain point in time and are bound \n'
+ 'to the current inventory setup. If any of these conditions change, the \n'
+ 'preview will be invalid. Please make sure to have a minimal \n'
+ 'timeframe between planning and applying the specs.'}]
+ if any([spec.service_type == 'host' for spec in specs]):
+ return [{'error': 'Found <HostSpec>. Previews that include Host Specifications are not supported, yet.'}]
+ for spec in specs:
+ results.append(self._plan(cast(ServiceSpec, spec)))
+ return results
+
+ def _apply_service_spec(self, spec: ServiceSpec) -> str:
+ if spec.placement.is_empty():
+ # fill in default placement
+ defaults = {
+ 'mon': PlacementSpec(count=5),
+ 'mgr': PlacementSpec(count=2),
+ 'mds': PlacementSpec(count=2),
+ 'rgw': PlacementSpec(count=2),
+ 'ingress': PlacementSpec(count=2),
+ 'iscsi': PlacementSpec(count=1),
+ 'rbd-mirror': PlacementSpec(count=2),
+ 'cephfs-mirror': PlacementSpec(count=1),
+ 'nfs': PlacementSpec(count=1),
+ 'grafana': PlacementSpec(count=1),
+ 'alertmanager': PlacementSpec(count=1),
+ 'prometheus': PlacementSpec(count=1),
+ 'node-exporter': PlacementSpec(host_pattern='*'),
+ 'crash': PlacementSpec(host_pattern='*'),
+ 'container': PlacementSpec(count=1),
+ 'cephadm-exporter': PlacementSpec(host_pattern='*'),
+ 'snmp-gateway': PlacementSpec(count=1),
+ }
+ spec.placement = defaults[spec.service_type]
+ elif spec.service_type in ['mon', 'mgr'] and \
+ spec.placement.count is not None and \
+ spec.placement.count < 1:
+ raise OrchestratorError('cannot scale %s service below 1' % (
+ spec.service_type))
+
+ host_count = len(self.inventory.keys())
+ max_count = self.max_count_per_host
+
+ if spec.placement.count is not None:
+ if spec.service_type in ['mon', 'mgr']:
+ if spec.placement.count > max(5, host_count):
+ raise OrchestratorError(
+ (f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {max(5, host_count)}.'))
+ elif spec.service_type != 'osd':
+ if spec.placement.count > (max_count * host_count):
+ raise OrchestratorError((f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {host_count*max_count} ({host_count}x{max_count}).'
+ + ' This limit can be adjusted by changing the mgr/cephadm/max_count_per_host config option'))
+
+ if spec.placement.count_per_host is not None and spec.placement.count_per_host > max_count and spec.service_type != 'osd':
+ raise OrchestratorError((f'The maximum count_per_host allowed is {max_count}.'
+ + ' This limit can be adjusted by changing the mgr/cephadm/max_count_per_host config option'))
+
+ HostAssignment(
+ spec=spec,
+ hosts=self.inventory.all_specs(), # All hosts, even those without daemon refresh
+ unreachable_hosts=self._unreachable_hosts(),
+ networks=self.cache.networks,
+ daemons=self.cache.get_daemons_by_service(spec.service_name()),
+ allow_colo=self.cephadm_services[spec.service_type].allow_colo(),
+ ).validate()
+
+ self.log.info('Saving service %s spec with placement %s' % (
+ spec.service_name(), spec.placement.pretty_str()))
+ self.spec_store.save(spec)
+ self._kick_serve_loop()
+ return "Scheduled %s update..." % spec.service_name()
+
+ @handle_orch_error
+ def apply(self, specs: Sequence[GenericSpec], no_overwrite: bool = False) -> List[str]:
+ results = []
+ for spec in specs:
+ if no_overwrite:
+ if spec.service_type == 'host' and cast(HostSpec, spec).hostname in self.inventory:
+ results.append('Skipped %s host spec. To change %s spec omit --no-overwrite flag'
+ % (cast(HostSpec, spec).hostname, spec.service_type))
+ continue
+ elif cast(ServiceSpec, spec).service_name() in self.spec_store:
+ results.append('Skipped %s service spec. To change %s spec omit --no-overwrite flag'
+ % (cast(ServiceSpec, spec).service_name(), cast(ServiceSpec, spec).service_name()))
+ continue
+ results.append(self._apply(spec))
+ return results
+
+ @handle_orch_error
+ def apply_mgr(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_mds(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_rgw(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_ingress(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_iscsi(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_rbd_mirror(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_nfs(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ def _get_dashboard_url(self):
+ # type: () -> str
+ return self.get('mgr_map').get('services', {}).get('dashboard', '')
+
+ @handle_orch_error
+ def apply_prometheus(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_node_exporter(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_crash(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_grafana(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_alertmanager(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_container(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_snmp_gateway(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def apply_cephadm_exporter(self, spec: ServiceSpec) -> str:
+ return self._apply(spec)
+
+ @handle_orch_error
+ def upgrade_check(self, image: str, version: str) -> str:
+ if self.inventory.get_host_with_state("maintenance"):
+ raise OrchestratorError("check aborted - you have hosts in maintenance state")
+
+ if version:
+ target_name = self.container_image_base + ':v' + version
+ elif image:
+ target_name = image
+ else:
+ raise OrchestratorError('must specify either image or version')
+
+ image_info = CephadmServe(self)._get_container_image_info(target_name)
+
+ ceph_image_version = image_info.ceph_version
+ if not ceph_image_version:
+ return f'Unable to extract ceph version from {target_name}.'
+ if ceph_image_version.startswith('ceph version '):
+ ceph_image_version = ceph_image_version.split(' ')[2]
+ version_error = self.upgrade._check_target_version(ceph_image_version)
+ if version_error:
+ return f'Incompatible upgrade: {version_error}'
+
+ self.log.debug(f'image info {image} -> {image_info}')
+ r: dict = {
+ 'target_name': target_name,
+ 'target_id': image_info.image_id,
+ 'target_version': image_info.ceph_version,
+ 'needs_update': dict(),
+ 'up_to_date': list(),
+ 'non_ceph_image_daemons': list()
+ }
+ for host, dm in self.cache.daemons.items():
+ for name, dd in dm.items():
+ if image_info.image_id == dd.container_image_id:
+ r['up_to_date'].append(dd.name())
+ elif dd.daemon_type in CEPH_IMAGE_TYPES:
+ r['needs_update'][dd.name()] = {
+ 'current_name': dd.container_image_name,
+ 'current_id': dd.container_image_id,
+ 'current_version': dd.version,
+ }
+ else:
+ r['non_ceph_image_daemons'].append(dd.name())
+ if self.use_repo_digest and image_info.repo_digests:
+ # FIXME: we assume the first digest is the best one to use
+ r['target_digest'] = image_info.repo_digests[0]
+
+ return json.dumps(r, indent=4, sort_keys=True)
+
+ @handle_orch_error
+ def upgrade_status(self) -> orchestrator.UpgradeStatusSpec:
+ return self.upgrade.upgrade_status()
+
+ @handle_orch_error
+ def upgrade_ls(self, image: Optional[str], tags: bool) -> Dict[Any, Any]:
+ return self.upgrade.upgrade_ls(image, tags)
+
+ @handle_orch_error
+ def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, host_placement: Optional[str] = None,
+ services: Optional[List[str]] = None, limit: Optional[int] = None) -> str:
+ if self.inventory.get_host_with_state("maintenance"):
+ raise OrchestratorError("upgrade aborted - you have host(s) in maintenance state")
+ if daemon_types is not None and services is not None:
+ raise OrchestratorError('--daemon-types and --services are mutually exclusive')
+ if daemon_types is not None:
+ for dtype in daemon_types:
+ if dtype not in CEPH_UPGRADE_ORDER:
+ raise OrchestratorError(f'Upgrade aborted - Got unexpected daemon type "{dtype}".\n'
+ f'Viable daemon types for this command are: {utils.CEPH_TYPES + utils.GATEWAY_TYPES}')
+ if services is not None:
+ for service in services:
+ if service not in self.spec_store:
+ raise OrchestratorError(f'Upgrade aborted - Got unknown service name "{service}".\n'
+ f'Known services are: {self.spec_store.all_specs.keys()}')
+ hosts: Optional[List[str]] = None
+ if host_placement is not None:
+ all_hosts = list(self.inventory.all_specs())
+ placement = PlacementSpec.from_string(host_placement)
+ hosts = placement.filter_matching_hostspecs(all_hosts)
+ if not hosts:
+ raise OrchestratorError(
+ f'Upgrade aborted - hosts parameter "{host_placement}" provided did not match any hosts')
+
+ if limit is not None:
+ if limit < 1:
+ raise OrchestratorError(f'Upgrade aborted - --limit arg must be a positive integer, not {limit}')
+
+ return self.upgrade.upgrade_start(image, version, daemon_types, hosts, services, limit)
+
+ @handle_orch_error
+ def upgrade_pause(self) -> str:
+ return self.upgrade.upgrade_pause()
+
+ @handle_orch_error
+ def upgrade_resume(self) -> str:
+ return self.upgrade.upgrade_resume()
+
+ @handle_orch_error
+ def upgrade_stop(self) -> str:
+ return self.upgrade.upgrade_stop()
+
+ @handle_orch_error
+ def remove_osds(self, osd_ids: List[str],
+ replace: bool = False,
+ force: bool = False,
+ zap: bool = False) -> str:
+ """
+ Takes a list of OSDs and schedules them for removal.
+ The function that takes care of the actual removal is
+ process_removal_queue().
+ """
+
+ daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('osd')
+ to_remove_daemons = list()
+ for daemon in daemons:
+ if daemon.daemon_id in osd_ids:
+ to_remove_daemons.append(daemon)
+ if not to_remove_daemons:
+ return f"Unable to find OSDs: {osd_ids}"
+
+ for daemon in to_remove_daemons:
+ assert daemon.daemon_id is not None
+ try:
+ self.to_remove_osds.enqueue(OSD(osd_id=int(daemon.daemon_id),
+ replace=replace,
+ force=force,
+ zap=zap,
+ hostname=daemon.hostname,
+ process_started_at=datetime_now(),
+ remove_util=self.to_remove_osds.rm_util))
+ except NotFoundError:
+ return f"Unable to find OSDs: {osd_ids}"
+
+ # trigger the serve loop to initiate the removal
+ self._kick_serve_loop()
+ return "Scheduled OSD(s) for removal"
+
+ @handle_orch_error
+ def stop_remove_osds(self, osd_ids: List[str]) -> str:
+ """
+ Stops a `removal` process for a List of OSDs.
+ This will revert their weight and remove it from the osds_to_remove queue
+ """
+ for osd_id in osd_ids:
+ try:
+ self.to_remove_osds.rm(OSD(osd_id=int(osd_id),
+ remove_util=self.to_remove_osds.rm_util))
+ except (NotFoundError, KeyError, ValueError):
+ return f'Unable to find OSD in the queue: {osd_id}'
+
+ # trigger the serve loop to halt the removal
+ self._kick_serve_loop()
+ return "Stopped OSD(s) removal"
+
+ @handle_orch_error
+ def remove_osds_status(self) -> List[OSD]:
+ """
+ The CLI call to retrieve an osd removal report
+ """
+ return self.to_remove_osds.all_osds()
+
+ @handle_orch_error
+ def drain_host(self, hostname, force=False):
+ # type: (str, bool) -> str
+ """
+ Drain all daemons from a host.
+ :param host: host name
+ """
+
+ # if we drain the last admin host we could end up removing the only instance
+ # of the config and keyring and cause issues
+ if not force:
+ p = PlacementSpec(label='_admin')
+ admin_hosts = p.filter_matching_hostspecs(self.inventory.all_specs())
+ if len(admin_hosts) == 1 and admin_hosts[0] == hostname:
+ raise OrchestratorValidationError(f"Host {hostname} is the last host with the '_admin'"
+ " label.\nDraining this host could cause the removal"
+ " of the last cluster config/keyring managed by cephadm.\n"
+ "It is recommended to add the _admin label to another host"
+ " before completing this operation.\nIf you're certain this is"
+ " what you want rerun this command with --force.")
+
+ self.add_host_label(hostname, '_no_schedule')
+
+ daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname)
+
+ osds_to_remove = [d.daemon_id for d in daemons if d.daemon_type == 'osd']
+ self.remove_osds(osds_to_remove)
+
+ daemons_table = ""
+ daemons_table += "{:<20} {:<15}\n".format("type", "id")
+ daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15)
+ for d in daemons:
+ daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id)
+
+ return "Scheduled to remove the following daemons from host '{}'\n{}".format(hostname, daemons_table)
+
+ def trigger_connect_dashboard_rgw(self) -> None:
+ self.need_connect_dashboard_rgw = True
+ self.event.set()
diff --git a/src/pybind/mgr/cephadm/offline_watcher.py b/src/pybind/mgr/cephadm/offline_watcher.py
new file mode 100644
index 000000000..006156fc7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/offline_watcher.py
@@ -0,0 +1,70 @@
+import logging
+from typing import List, Optional, TYPE_CHECKING
+
+import multiprocessing as mp
+import threading
+
+from cephadm.serve import CephadmServe
+
+try:
+ import remoto
+except ImportError:
+ remoto = None
+
+
+if TYPE_CHECKING:
+ from cephadm.module import CephadmOrchestrator
+
+logger = logging.getLogger(__name__)
+
+
+class OfflineHostWatcher(threading.Thread):
+ def __init__(self, mgr: "CephadmOrchestrator") -> None:
+ self.mgr = mgr
+ self.hosts: Optional[List[str]] = None
+ self.new_hosts: Optional[List[str]] = None
+ self.stop = False
+ self.event = threading.Event()
+ super(OfflineHostWatcher, self).__init__(target=self.run)
+
+ def run(self) -> None:
+ self.thread_pool = mp.pool.ThreadPool(10)
+ while not self.stop:
+ # only need to take action if we have hosts to check
+ if self.hosts or self.new_hosts:
+ if self.new_hosts:
+ self.hosts = self.new_hosts
+ self.new_hosts = None
+ logger.debug(f'OfflineHostDetector: Checking if hosts: {self.hosts} are offline.')
+ assert self.hosts is not None
+ self.thread_pool.map(self.check_host, self.hosts)
+ self.event.wait(20)
+ self.event.clear()
+ self.thread_pool.close()
+ self.thread_pool.join()
+
+ def check_host(self, host: str) -> None:
+ if host not in self.mgr.offline_hosts:
+ try:
+ with CephadmServe(self.mgr)._remote_connection(host) as tpl:
+ conn, connr = tpl
+ out, err, code = remoto.process.check(conn, ['true'])
+ except Exception:
+ logger.debug(f'OfflineHostDetector: detected {host} to be offline')
+ # kick serve loop in case corrective action must be taken for offline host
+ self.mgr._kick_serve_loop()
+
+ def set_hosts(self, hosts: List[str]) -> None:
+ hosts.sort()
+ if (not self.hosts or self.hosts != hosts) and hosts:
+ self.new_hosts = hosts
+ logger.debug(
+ f'OfflineHostDetector: Hosts to check if offline swapped to: {self.new_hosts}.')
+ self.wakeup()
+
+ def wakeup(self) -> None:
+ self.event.set()
+
+ def shutdown(self) -> None:
+ self.stop = True
+ self.wakeup()
diff --git a/src/pybind/mgr/cephadm/registry.py b/src/pybind/mgr/cephadm/registry.py
new file mode 100644
index 000000000..31e5fb23e
--- /dev/null
+++ b/src/pybind/mgr/cephadm/registry.py
@@ -0,0 +1,65 @@
+import requests
+from typing import List, Dict, Tuple
+from requests import Response
+
+
+class Registry:
+
+ def __init__(self, url: str):
+ self._url: str = url
+
+ @property
+ def api_domain(self) -> str:
+ if self._url == 'docker.io':
+ return 'registry-1.docker.io'
+ return self._url
+
+ def get_token(self, response: Response) -> str:
+ realm, params = self.parse_www_authenticate(response.headers['Www-Authenticate'])
+ r = requests.get(realm, params=params)
+ r.raise_for_status()
+ ret = r.json()
+ if 'access_token' in ret:
+ return ret['access_token']
+ if 'token' in ret:
+ return ret['token']
+ raise ValueError(f'Unknown token reply {ret}')
+
+ def parse_www_authenticate(self, text: str) -> Tuple[str, Dict[str, str]]:
+ # 'Www-Authenticate': 'Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:ceph/ceph:pull"'
+ r: Dict[str, str] = {}
+ for token in text.split(','):
+ key, value = token.split('=', 1)
+ r[key] = value.strip('"')
+ realm = r.pop('Bearer realm')
+ return realm, r
+
+ def get_tags(self, image: str) -> List[str]:
+ tags = []
+ headers = {'Accept': 'application/json'}
+ url = f'https://{self.api_domain}/v2/{image}/tags/list'
+ while True:
+ try:
+ r = requests.get(url, headers=headers)
+ except requests.exceptions.ConnectionError as e:
+ msg = f"Cannot get tags from url '{url}': {e}"
+ raise ValueError(msg) from e
+ if r.status_code == 401:
+ if 'Authorization' in headers:
+ raise ValueError('failed authentication')
+ token = self.get_token(r)
+ headers['Authorization'] = f'Bearer {token}'
+ continue
+ r.raise_for_status()
+
+ new_tags = r.json()['tags']
+ tags.extend(new_tags)
+
+ if 'Link' not in r.headers:
+ break
+
+ # strip < > brackets off and prepend the domain
+ url = f'https://{self.api_domain}' + r.headers['Link'].split(';')[0][1:-1]
+ continue
+
+ return tags
diff --git a/src/pybind/mgr/cephadm/remotes.py b/src/pybind/mgr/cephadm/remotes.py
new file mode 100644
index 000000000..e1ecf2dcb
--- /dev/null
+++ b/src/pybind/mgr/cephadm/remotes.py
@@ -0,0 +1,50 @@
+# ceph-deploy ftw
+import os
+try:
+ from typing import Optional
+except ImportError:
+ pass
+
+PYTHONS = ['python3', 'python2', 'python']
+PATH = [
+ '/usr/bin',
+ '/usr/local/bin',
+ '/bin',
+ '/usr/sbin',
+ '/usr/local/sbin',
+ '/sbin',
+]
+
+
+def choose_python():
+ # type: () -> Optional[str]
+ for e in PYTHONS:
+ for b in PATH:
+ p = os.path.join(b, e)
+ if os.path.exists(p):
+ return p
+ return None
+
+
+def write_file(path: str, content: bytes, mode: int, uid: int, gid: int,
+ mkdir_p: bool = True) -> Optional[str]:
+ try:
+ if mkdir_p:
+ dirname = os.path.dirname(path)
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+ tmp_path = path + '.new'
+ with open(tmp_path, 'wb') as f:
+ os.fchown(f.fileno(), uid, gid)
+ os.fchmod(f.fileno(), mode)
+ f.write(content)
+ os.fsync(f.fileno())
+ os.rename(tmp_path, path)
+ except Exception as e:
+ return str(e)
+ return None
+
+
+if __name__ == '__channelexec__':
+ for item in channel: # type: ignore # noqa: F821
+ channel.send(eval(item)) # type: ignore # noqa: F821
diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py
new file mode 100644
index 000000000..a727f5049
--- /dev/null
+++ b/src/pybind/mgr/cephadm/schedule.py
@@ -0,0 +1,447 @@
+import hashlib
+import logging
+import random
+from typing import List, Optional, Callable, TypeVar, Tuple, NamedTuple, Dict
+
+import orchestrator
+from ceph.deployment.service_spec import ServiceSpec
+from orchestrator._interface import DaemonDescription
+from orchestrator import OrchestratorValidationError
+from .utils import RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES
+
+logger = logging.getLogger(__name__)
+T = TypeVar('T')
+
+
+class DaemonPlacement(NamedTuple):
+ daemon_type: str
+ hostname: str
+ network: str = '' # for mons only
+ name: str = ''
+ ip: Optional[str] = None
+ ports: List[int] = []
+ rank: Optional[int] = None
+ rank_generation: Optional[int] = None
+
+ def __str__(self) -> str:
+ res = self.daemon_type + ':' + self.hostname
+ other = []
+ if self.rank is not None:
+ other.append(f'rank={self.rank}.{self.rank_generation}')
+ if self.network:
+ other.append(f'network={self.network}')
+ if self.name:
+ other.append(f'name={self.name}')
+ if self.ports:
+ other.append(f'{self.ip or "*"}:{",".join(map(str, self.ports))}')
+ if other:
+ res += '(' + ' '.join(other) + ')'
+ return res
+
+ def renumber_ports(self, n: int) -> 'DaemonPlacement':
+ return DaemonPlacement(
+ self.daemon_type,
+ self.hostname,
+ self.network,
+ self.name,
+ self.ip,
+ [p + n for p in self.ports],
+ self.rank,
+ self.rank_generation,
+ )
+
+ def assign_rank(self, rank: int, gen: int) -> 'DaemonPlacement':
+ return DaemonPlacement(
+ self.daemon_type,
+ self.hostname,
+ self.network,
+ self.name,
+ self.ip,
+ self.ports,
+ rank,
+ gen,
+ )
+
+ def assign_name(self, name: str) -> 'DaemonPlacement':
+ return DaemonPlacement(
+ self.daemon_type,
+ self.hostname,
+ self.network,
+ name,
+ self.ip,
+ self.ports,
+ self.rank,
+ self.rank_generation,
+ )
+
+ def assign_rank_generation(
+ self,
+ rank: int,
+ rank_map: Dict[int, Dict[int, Optional[str]]]
+ ) -> 'DaemonPlacement':
+ if rank not in rank_map:
+ rank_map[rank] = {}
+ gen = 0
+ else:
+ gen = max(rank_map[rank].keys()) + 1
+ rank_map[rank][gen] = None
+ return DaemonPlacement(
+ self.daemon_type,
+ self.hostname,
+ self.network,
+ self.name,
+ self.ip,
+ self.ports,
+ rank,
+ gen,
+ )
+
+ def matches_daemon(self, dd: DaemonDescription) -> bool:
+ if self.daemon_type != dd.daemon_type:
+ return False
+ if self.hostname != dd.hostname:
+ return False
+ # fixme: how to match against network?
+ if self.name and self.name != dd.daemon_id:
+ return False
+ if self.ports:
+ if self.ports != dd.ports and dd.ports:
+ return False
+ if self.ip != dd.ip and dd.ip:
+ return False
+ return True
+
+ def matches_rank_map(
+ self,
+ dd: DaemonDescription,
+ rank_map: Optional[Dict[int, Dict[int, Optional[str]]]],
+ ranks: List[int]
+ ) -> bool:
+ if rank_map is None:
+ # daemon should have no rank
+ return dd.rank is None
+
+ if dd.rank is None:
+ return False
+
+ if dd.rank not in rank_map:
+ return False
+ if dd.rank not in ranks:
+ return False
+
+ # must be the highest/newest rank_generation
+ if dd.rank_generation != max(rank_map[dd.rank].keys()):
+ return False
+
+ # must be *this* daemon
+ return rank_map[dd.rank][dd.rank_generation] == dd.daemon_id
+
+
+class HostAssignment(object):
+
+ def __init__(self,
+ spec: ServiceSpec,
+ hosts: List[orchestrator.HostSpec],
+ unreachable_hosts: List[orchestrator.HostSpec],
+ daemons: List[orchestrator.DaemonDescription],
+ networks: Dict[str, Dict[str, Dict[str, List[str]]]] = {},
+ filter_new_host: Optional[Callable[[str], bool]] = None,
+ allow_colo: bool = False,
+ primary_daemon_type: Optional[str] = None,
+ per_host_daemon_type: Optional[str] = None,
+ rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] = None,
+ ):
+ assert spec
+ self.spec = spec # type: ServiceSpec
+ self.primary_daemon_type = primary_daemon_type or spec.service_type
+ self.hosts: List[orchestrator.HostSpec] = hosts
+ self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts
+ self.filter_new_host = filter_new_host
+ self.service_name = spec.service_name()
+ self.daemons = daemons
+ self.networks = networks
+ self.allow_colo = allow_colo
+ self.per_host_daemon_type = per_host_daemon_type
+ self.ports_start = spec.get_port_start()
+ self.rank_map = rank_map
+
+ def hosts_by_label(self, label: str) -> List[orchestrator.HostSpec]:
+ return [h for h in self.hosts if label in h.labels]
+
+ def get_hostnames(self) -> List[str]:
+ return [h.hostname for h in self.hosts]
+
+ def validate(self) -> None:
+ self.spec.validate()
+
+ if self.spec.placement.count == 0:
+ raise OrchestratorValidationError(
+ f'<count> can not be 0 for {self.spec.one_line_str()}')
+
+ if (
+ self.spec.placement.count_per_host is not None
+ and self.spec.placement.count_per_host > 1
+ and not self.allow_colo
+ ):
+ raise OrchestratorValidationError(
+ f'Cannot place more than one {self.spec.service_type} per host'
+ )
+
+ if self.spec.placement.hosts:
+ explicit_hostnames = {h.hostname for h in self.spec.placement.hosts}
+ unknown_hosts = explicit_hostnames.difference(set(self.get_hostnames()))
+ if unknown_hosts:
+ raise OrchestratorValidationError(
+ f'Cannot place {self.spec.one_line_str()} on {", ".join(sorted(unknown_hosts))}: Unknown hosts')
+
+ if self.spec.placement.host_pattern:
+ pattern_hostnames = self.spec.placement.filter_matching_hostspecs(self.hosts)
+ if not pattern_hostnames:
+ raise OrchestratorValidationError(
+ f'Cannot place {self.spec.one_line_str()}: No matching hosts')
+
+ if self.spec.placement.label:
+ label_hosts = self.hosts_by_label(self.spec.placement.label)
+ if not label_hosts:
+ raise OrchestratorValidationError(
+ f'Cannot place {self.spec.one_line_str()}: No matching '
+ f'hosts for label {self.spec.placement.label}')
+
+ def place_per_host_daemons(
+ self,
+ slots: List[DaemonPlacement],
+ to_add: List[DaemonPlacement],
+ to_remove: List[orchestrator.DaemonDescription],
+ ) -> Tuple[List[DaemonPlacement], List[DaemonPlacement], List[orchestrator.DaemonDescription]]:
+ if self.per_host_daemon_type:
+ host_slots = [
+ DaemonPlacement(daemon_type=self.per_host_daemon_type,
+ hostname=hostname)
+ for hostname in set([s.hostname for s in slots])
+ ]
+ existing = [
+ d for d in self.daemons if d.daemon_type == self.per_host_daemon_type
+ ]
+ slots += host_slots
+ for dd in existing:
+ found = False
+ for p in host_slots:
+ if p.matches_daemon(dd):
+ host_slots.remove(p)
+ found = True
+ break
+ if not found:
+ to_remove.append(dd)
+ to_add += host_slots
+
+ to_remove = [d for d in to_remove if d.hostname not in [
+ h.hostname for h in self.unreachable_hosts]]
+
+ return slots, to_add, to_remove
+
+ def place(self):
+ # type: () -> Tuple[List[DaemonPlacement], List[DaemonPlacement], List[orchestrator.DaemonDescription]]
+ """
+ Generate a list of HostPlacementSpec taking into account:
+
+ * all known hosts
+ * hosts with existing daemons
+ * placement spec
+ * self.filter_new_host
+ """
+
+ self.validate()
+
+ count = self.spec.placement.count
+
+ # get candidate hosts based on [hosts, label, host_pattern]
+ candidates = self.get_candidates() # type: List[DaemonPlacement]
+ if self.primary_daemon_type in RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES:
+ # remove unreachable hosts that are not in maintenance so daemons
+ # on these hosts will be rescheduled
+ candidates = self.remove_non_maintenance_unreachable_candidates(candidates)
+
+ def expand_candidates(ls: List[DaemonPlacement], num: int) -> List[DaemonPlacement]:
+ r = []
+ for offset in range(num):
+ r.extend([dp.renumber_ports(offset) for dp in ls])
+ return r
+
+ # consider enough slots to fulfill target count-per-host or count
+ if count is None:
+ if self.spec.placement.count_per_host:
+ per_host = self.spec.placement.count_per_host
+ else:
+ per_host = 1
+ candidates = expand_candidates(candidates, per_host)
+ elif self.allow_colo and candidates:
+ per_host = 1 + ((count - 1) // len(candidates))
+ candidates = expand_candidates(candidates, per_host)
+
+ # consider (preserve) existing daemons in a particular order...
+ daemons = sorted(
+ [
+ d for d in self.daemons if d.daemon_type == self.primary_daemon_type
+ ],
+ key=lambda d: (
+ not d.is_active, # active before standby
+ d.rank is not None, # ranked first, then non-ranked
+ d.rank, # low ranks
+ 0 - (d.rank_generation or 0), # newer generations first
+ )
+ )
+
+ # sort candidates into existing/used slots that already have a
+ # daemon, and others (the rest)
+ existing_active: List[orchestrator.DaemonDescription] = []
+ existing_standby: List[orchestrator.DaemonDescription] = []
+ existing_slots: List[DaemonPlacement] = []
+ to_add: List[DaemonPlacement] = []
+ to_remove: List[orchestrator.DaemonDescription] = []
+ ranks: List[int] = list(range(len(candidates)))
+ others: List[DaemonPlacement] = candidates.copy()
+ for dd in daemons:
+ found = False
+ for p in others:
+ if p.matches_daemon(dd) and p.matches_rank_map(dd, self.rank_map, ranks):
+ others.remove(p)
+ if dd.is_active:
+ existing_active.append(dd)
+ else:
+ existing_standby.append(dd)
+ if dd.rank is not None:
+ assert dd.rank_generation is not None
+ p = p.assign_rank(dd.rank, dd.rank_generation)
+ ranks.remove(dd.rank)
+ existing_slots.append(p)
+ found = True
+ break
+ if not found:
+ to_remove.append(dd)
+
+ # TODO: At some point we want to deploy daemons that are on offline hosts
+ # at what point we do this differs per daemon type. Stateless daemons we could
+ # do quickly to improve availability. Steful daemons we might want to wait longer
+ # to see if the host comes back online
+
+ existing = existing_active + existing_standby
+
+ # build to_add
+ if not count:
+ to_add = [dd for dd in others if dd.hostname not in [
+ h.hostname for h in self.unreachable_hosts]]
+ else:
+ # The number of new slots that need to be selected in order to fulfill count
+ need = count - len(existing)
+
+ # we don't need any additional placements
+ if need <= 0:
+ to_remove.extend(existing[count:])
+ del existing_slots[count:]
+ return self.place_per_host_daemons(existing_slots, [], to_remove)
+
+ for dp in others:
+ if need <= 0:
+ break
+ if dp.hostname not in [h.hostname for h in self.unreachable_hosts]:
+ to_add.append(dp)
+ need -= 1 # this is last use of need in this function so it can work as a counter
+
+ if self.rank_map is not None:
+ # assign unused ranks (and rank_generations) to to_add
+ assert len(ranks) >= len(to_add)
+ for i in range(len(to_add)):
+ to_add[i] = to_add[i].assign_rank_generation(ranks[i], self.rank_map)
+
+ logger.debug('Combine hosts with existing daemons %s + new hosts %s' % (existing, to_add))
+ return self.place_per_host_daemons(existing_slots + to_add, to_add, to_remove)
+
+ def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]:
+ for subnet in subnets:
+ ips: List[str] = []
+ for iface, iface_ips in self.networks.get(hostname, {}).get(subnet, {}).items():
+ ips.extend(iface_ips)
+ if ips:
+ return sorted(ips)[0]
+ return None
+
+ def get_candidates(self) -> List[DaemonPlacement]:
+ if self.spec.placement.hosts:
+ ls = [
+ DaemonPlacement(daemon_type=self.primary_daemon_type,
+ hostname=h.hostname, network=h.network, name=h.name,
+ ports=self.ports_start)
+ for h in self.spec.placement.hosts
+ ]
+ elif self.spec.placement.label:
+ ls = [
+ DaemonPlacement(daemon_type=self.primary_daemon_type,
+ hostname=x.hostname, ports=self.ports_start)
+ for x in self.hosts_by_label(self.spec.placement.label)
+ ]
+ elif self.spec.placement.host_pattern:
+ ls = [
+ DaemonPlacement(daemon_type=self.primary_daemon_type,
+ hostname=x, ports=self.ports_start)
+ for x in self.spec.placement.filter_matching_hostspecs(self.hosts)
+ ]
+ elif (
+ self.spec.placement.count is not None
+ or self.spec.placement.count_per_host is not None
+ ):
+ ls = [
+ DaemonPlacement(daemon_type=self.primary_daemon_type,
+ hostname=x.hostname, ports=self.ports_start)
+ for x in self.hosts
+ ]
+ else:
+ raise OrchestratorValidationError(
+ "placement spec is empty: no hosts, no label, no pattern, no count")
+
+ # allocate an IP?
+ if self.spec.networks:
+ orig = ls.copy()
+ ls = []
+ for p in orig:
+ ip = self.find_ip_on_host(p.hostname, self.spec.networks)
+ if ip:
+ ls.append(DaemonPlacement(daemon_type=self.primary_daemon_type,
+ hostname=p.hostname, network=p.network,
+ name=p.name, ports=p.ports, ip=ip))
+ else:
+ logger.debug(
+ f'Skipping {p.hostname} with no IP in network(s) {self.spec.networks}'
+ )
+
+ if self.filter_new_host:
+ old = ls.copy()
+ ls = []
+ for h in old:
+ if self.filter_new_host(h.hostname):
+ ls.append(h)
+ if len(old) > len(ls):
+ logger.debug('Filtered %s down to %s' % (old, ls))
+
+ # now that we have the list of nodes candidates based on the configured
+ # placement, let's shuffle the list for node pseudo-random selection. For this,
+ # we generate a seed from the service name and we use to shuffle the candidates.
+ # This makes shuffling deterministic for the same service name.
+ seed = int(
+ hashlib.sha1(self.spec.service_name().encode('utf-8')).hexdigest(),
+ 16
+ ) % (2 ** 32) # truncate result to 32 bits
+ final = sorted(ls)
+ random.Random(seed).shuffle(final)
+ return final
+
+ def remove_non_maintenance_unreachable_candidates(self, candidates: List[DaemonPlacement]) -> List[DaemonPlacement]:
+ in_maintenance: Dict[str, bool] = {}
+ for h in self.hosts:
+ if h.status.lower() == 'maintenance':
+ in_maintenance[h.hostname] = True
+ continue
+ in_maintenance[h.hostname] = False
+ unreachable_hosts = [h.hostname for h in self.unreachable_hosts]
+ candidates = [
+ c for c in candidates if c.hostname not in unreachable_hosts or in_maintenance[c.hostname]]
+ return candidates
diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py
new file mode 100644
index 000000000..7ac6fee88
--- /dev/null
+++ b/src/pybind/mgr/cephadm/serve.py
@@ -0,0 +1,1487 @@
+import hashlib
+import json
+import logging
+import uuid
+from collections import defaultdict
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Optional, List, cast, Dict, Any, Union, Tuple, Iterator, \
+ DefaultDict
+
+from cephadm import remotes
+
+try:
+ import remoto
+ import execnet.gateway_bootstrap
+except ImportError:
+ remoto = None
+
+from ceph.deployment import inventory
+from ceph.deployment.drive_group import DriveGroupSpec
+from ceph.deployment.service_spec import ServiceSpec, CustomContainerSpec, PlacementSpec
+from ceph.utils import str_to_datetime, datetime_now
+
+import orchestrator
+from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent, \
+ DaemonDescriptionStatus, daemon_type_to_service
+from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
+from cephadm.schedule import HostAssignment
+from cephadm.autotune import MemoryAutotuner
+from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \
+ CephadmNoImage, CEPH_TYPES, ContainerInspectInfo
+from mgr_module import MonCommandFailed
+from mgr_util import format_bytes
+
+from . import utils
+
+if TYPE_CHECKING:
+ from cephadm.module import CephadmOrchestrator
+ from remoto.backends import BaseConnection
+
+logger = logging.getLogger(__name__)
+
+REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw']
+
+
+class CephadmServe:
+ """
+ This module contains functions that are executed in the
+ serve() thread. Thus they don't block the CLI.
+
+ Please see the `Note regarding network calls from CLI handlers`
+ chapter in the cephadm developer guide.
+
+ On the other hand, These function should *not* be called form
+ CLI handlers, to avoid blocking the CLI
+ """
+
+ def __init__(self, mgr: "CephadmOrchestrator"):
+ self.mgr: "CephadmOrchestrator" = mgr
+ self.log = logger
+
+ def serve(self) -> None:
+ """
+ The main loop of cephadm.
+
+ A command handler will typically change the declarative state
+ of cephadm. This loop will then attempt to apply this new state.
+ """
+ self.log.debug("serve starting")
+ self.mgr.config_checker.load_network_config()
+
+ while self.mgr.run:
+ self.log.debug("serve loop start")
+
+ try:
+
+ self.convert_tags_to_repo_digest()
+
+ # refresh daemons
+ self.log.debug('refreshing hosts and daemons')
+ self._refresh_hosts_and_daemons()
+
+ self._check_for_strays()
+
+ self._update_paused_health()
+
+ if self.mgr.need_connect_dashboard_rgw and self.mgr.config_dashboard:
+ self.mgr.need_connect_dashboard_rgw = False
+ if 'dashboard' in self.mgr.get('mgr_map')['modules']:
+ self.log.info('Checking dashboard <-> RGW credentials')
+ self.mgr.remote('dashboard', 'set_rgw_credentials')
+
+ if not self.mgr.paused:
+ self.mgr.to_remove_osds.process_removal_queue()
+
+ self.mgr.migration.migrate()
+ if self.mgr.migration.is_migration_ongoing():
+ continue
+
+ if self._apply_all_services():
+ continue # did something, refresh
+
+ self._check_daemons()
+
+ self._purge_deleted_services()
+
+ self._check_for_moved_osds()
+
+ if self.mgr.upgrade.continue_upgrade():
+ continue
+
+ except OrchestratorError as e:
+ if e.event_subject:
+ self.mgr.events.from_orch_error(e)
+
+ self.log.debug("serve loop sleep")
+ self._serve_sleep()
+ self.log.debug("serve loop wake")
+ self.log.debug("serve exit")
+
+ def _serve_sleep(self) -> None:
+ sleep_interval = max(
+ 30,
+ min(
+ self.mgr.host_check_interval,
+ self.mgr.facts_cache_timeout,
+ self.mgr.daemon_cache_timeout,
+ self.mgr.device_cache_timeout,
+ )
+ )
+ self.log.debug('Sleeping for %d seconds', sleep_interval)
+ self.mgr.event.wait(sleep_interval)
+ self.mgr.event.clear()
+
+ def _update_paused_health(self) -> None:
+ self.log.debug('_update_paused_health')
+ if self.mgr.paused:
+ self.mgr.set_health_warning('CEPHADM_PAUSED', 'cephadm background work is paused', 1, ["'ceph orch resume' to resume"])
+ else:
+ self.mgr.remove_health_warning('CEPHADM_PAUSED')
+
+ def _autotune_host_memory(self, host: str) -> None:
+ total_mem = self.mgr.cache.get_facts(host).get('memory_total_kb', 0)
+ if not total_mem:
+ val = None
+ else:
+ total_mem *= 1024 # kb -> bytes
+ total_mem *= self.mgr.autotune_memory_target_ratio
+ a = MemoryAutotuner(
+ daemons=self.mgr.cache.get_daemons_by_host(host),
+ config_get=self.mgr.get_foreign_ceph_option,
+ total_mem=total_mem,
+ )
+ val, osds = a.tune()
+ any_changed = False
+ for o in osds:
+ if self.mgr.get_foreign_ceph_option(o, 'osd_memory_target') != val:
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': o,
+ 'name': 'osd_memory_target',
+ })
+ any_changed = True
+ if val is not None:
+ if any_changed:
+ self.mgr.log.info(
+ f'Adjusting osd_memory_target on {host} to {format_bytes(val, 6)}'
+ )
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'config set',
+ 'who': f'osd/host:{host.split(".")[0]}',
+ 'name': 'osd_memory_target',
+ 'value': str(val),
+ })
+ if ret:
+ self.log.warning(
+ f'Unable to set osd_memory_target on {host} to {val}: {err}'
+ )
+ else:
+ # if osd memory autotuning is off, we don't want to remove these config
+ # options as users may be using them. Since there is no way to set autotuning
+ # on/off at a host level, best we can do is check if it is globally on.
+ if self.mgr.get_foreign_ceph_option('osd', 'osd_memory_target_autotune'):
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': f'osd/host:{host.split(".")[0]}',
+ 'name': 'osd_memory_target',
+ })
+ self.mgr.cache.update_autotune(host)
+
+ def _refresh_hosts_and_daemons(self) -> None:
+ self.log.debug('_refresh_hosts_and_daemons')
+ bad_hosts = []
+ failures = []
+
+ if self.mgr.manage_etc_ceph_ceph_conf or self.mgr.keys.keys:
+ client_files = self._calc_client_files()
+ else:
+ client_files = {}
+
+ @forall_hosts
+ def refresh(host: str) -> None:
+
+ # skip hosts that are in maintenance - they could be powered off
+ if self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance":
+ return
+
+ if self.mgr.cache.host_needs_check(host):
+ r = self._check_host(host)
+ if r is not None:
+ bad_hosts.append(r)
+ if self.mgr.cache.host_needs_daemon_refresh(host):
+ self.log.debug('refreshing %s daemons' % host)
+ r = self._refresh_host_daemons(host)
+ if r:
+ failures.append(r)
+
+ if self.mgr.cache.host_needs_registry_login(host) and self.mgr.get_store('registry_credentials'):
+ self.log.debug(f"Logging `{host}` into custom registry")
+ r = self._registry_login(
+ host, json.loads(str(self.mgr.get_store('registry_credentials'))))
+ if r:
+ bad_hosts.append(r)
+
+ if self.mgr.cache.host_needs_device_refresh(host):
+ self.log.debug('refreshing %s devices' % host)
+ r = self._refresh_host_devices(host)
+ if r:
+ failures.append(r)
+
+ if self.mgr.cache.host_needs_facts_refresh(host):
+ self.log.debug(('Refreshing %s facts' % host))
+ r = self._refresh_facts(host)
+ if r:
+ failures.append(r)
+
+ if self.mgr.cache.host_needs_osdspec_preview_refresh(host):
+ self.log.debug(f"refreshing OSDSpec previews for {host}")
+ r = self._refresh_host_osdspec_previews(host)
+ if r:
+ failures.append(r)
+
+ if (
+ self.mgr.cache.host_needs_autotune_memory(host)
+ and not self.mgr.inventory.has_label(host, '_no_autotune_memory')
+ ):
+ self.log.debug(f"autotuning memory for {host}")
+ self._autotune_host_memory(host)
+
+ self._write_client_files(client_files, host)
+
+ refresh(self.mgr.cache.get_hosts())
+
+ self.mgr.config_checker.run_checks()
+
+ for k in [
+ 'CEPHADM_HOST_CHECK_FAILED',
+ 'CEPHADM_FAILED_DAEMON',
+ 'CEPHADM_REFRESH_FAILED',
+ ]:
+ self.mgr.remove_health_warning(k)
+ if bad_hosts:
+ self.mgr.set_health_warning('CEPHADM_HOST_CHECK_FAILED', f'{len(bad_hosts)} hosts fail cephadm check', len(bad_hosts), bad_hosts)
+ if failures:
+ self.mgr.set_health_warning('CEPHADM_REFRESH_FAILED', 'failed to probe daemons or devices', len(failures), failures)
+ failed_daemons = []
+ for dd in self.mgr.cache.get_daemons():
+ if dd.status is not None and dd.status == DaemonDescriptionStatus.error:
+ failed_daemons.append('daemon %s on %s is in %s state' % (
+ dd.name(), dd.hostname, dd.status_desc
+ ))
+ if failed_daemons:
+ self.mgr.set_health_warning('CEPHADM_FAILED_DAEMON', f'{len(failed_daemons)} failed cephadm daemon(s)', len(failed_daemons), failed_daemons)
+
+ def _check_host(self, host: str) -> Optional[str]:
+ if host not in self.mgr.inventory:
+ return None
+ self.log.debug(' checking %s' % host)
+ try:
+ addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host
+ out, err, code = self._run_cephadm(
+ host, cephadmNoImage, 'check-host', [],
+ error_ok=True, no_fsid=True)
+ self.mgr.cache.update_last_host_check(host)
+ self.mgr.cache.save_host(host)
+ if code:
+ self.log.debug(' host %s (%s) failed check' % (host, addr))
+ if self.mgr.warn_on_failed_host_check:
+ return 'host %s (%s) failed check: %s' % (host, addr, err)
+ else:
+ self.log.debug(' host %s (%s) ok' % (host, addr))
+ except Exception as e:
+ self.log.debug(' host %s (%s) failed check' % (host, addr))
+ return 'host %s (%s) failed check: %s' % (host, addr, e)
+ return None
+
+ def _refresh_host_daemons(self, host: str) -> Optional[str]:
+ try:
+ ls = self._run_cephadm_json(host, 'mon', 'ls', [], no_fsid=True)
+ except OrchestratorError as e:
+ return str(e)
+ dm = {}
+ for d in ls:
+ if not d['style'].startswith('cephadm'):
+ continue
+ if d['fsid'] != self.mgr._cluster_fsid:
+ continue
+ if '.' not in d['name']:
+ continue
+ sd = orchestrator.DaemonDescription()
+ sd.last_refresh = datetime_now()
+ for k in ['created', 'started', 'last_configured', 'last_deployed']:
+ v = d.get(k, None)
+ if v:
+ setattr(sd, k, str_to_datetime(d[k]))
+ sd.daemon_type = d['name'].split('.')[0]
+ if sd.daemon_type not in orchestrator.KNOWN_DAEMON_TYPES:
+ logger.warning(f"Found unknown daemon type {sd.daemon_type} on host {host}")
+ continue
+
+ sd.daemon_id = '.'.join(d['name'].split('.')[1:])
+ sd.hostname = host
+ sd.container_id = d.get('container_id')
+ if sd.container_id:
+ # shorten the hash
+ sd.container_id = sd.container_id[0:12]
+ sd.container_image_name = d.get('container_image_name')
+ sd.container_image_id = d.get('container_image_id')
+ sd.container_image_digests = d.get('container_image_digests')
+ sd.memory_usage = d.get('memory_usage')
+ sd.memory_request = d.get('memory_request')
+ sd.memory_limit = d.get('memory_limit')
+ sd.cpu_percentage = d.get('cpu_percentage')
+ sd._service_name = d.get('service_name')
+ sd.deployed_by = d.get('deployed_by')
+ sd.version = d.get('version')
+ sd.ports = d.get('ports')
+ sd.ip = d.get('ip')
+ sd.rank = int(d['rank']) if d.get('rank') is not None else None
+ sd.rank_generation = int(d['rank_generation']) if d.get(
+ 'rank_generation') is not None else None
+ sd.extra_container_args = d.get('extra_container_args')
+ if 'state' in d:
+ sd.status_desc = d['state']
+ sd.status = {
+ 'running': DaemonDescriptionStatus.running,
+ 'stopped': DaemonDescriptionStatus.stopped,
+ 'error': DaemonDescriptionStatus.error,
+ 'unknown': DaemonDescriptionStatus.error,
+ }[d['state']]
+ else:
+ sd.status_desc = 'unknown'
+ sd.status = None
+ dm[sd.name()] = sd
+ self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm)))
+ self.mgr.cache.update_host_daemons(host, dm)
+ self.mgr.cache.save_host(host)
+ return None
+
+ def _refresh_facts(self, host: str) -> Optional[str]:
+ try:
+ val = self._run_cephadm_json(host, cephadmNoImage, 'gather-facts', [], no_fsid=True)
+ except OrchestratorError as e:
+ return str(e)
+
+ self.mgr.cache.update_host_facts(host, val)
+
+ return None
+
+ def _refresh_host_devices(self, host: str) -> Optional[str]:
+ with_lsm = self.mgr.device_enhanced_scan
+ inventory_args = ['--', 'inventory',
+ '--format=json-pretty',
+ '--filter-for-batch']
+ if with_lsm:
+ inventory_args.insert(-1, "--with-lsm")
+
+ try:
+ try:
+ devices = self._run_cephadm_json(host, 'osd', 'ceph-volume',
+ inventory_args)
+ except OrchestratorError as e:
+ if 'unrecognized arguments: --filter-for-batch' in str(e):
+ rerun_args = inventory_args.copy()
+ rerun_args.remove('--filter-for-batch')
+ devices = self._run_cephadm_json(host, 'osd', 'ceph-volume',
+ rerun_args)
+ else:
+ raise
+
+ networks = self._run_cephadm_json(host, 'mon', 'list-networks', [], no_fsid=True)
+ except OrchestratorError as e:
+ return str(e)
+
+ self.log.debug('Refreshed host %s devices (%d) networks (%s)' % (
+ host, len(devices), len(networks)))
+ ret = inventory.Devices.from_json(devices)
+ self.mgr.cache.update_host_devices_networks(host, ret.devices, networks)
+ self.update_osdspec_previews(host)
+ self.mgr.cache.save_host(host)
+ return None
+
+ def _refresh_host_osdspec_previews(self, host: str) -> Optional[str]:
+ self.update_osdspec_previews(host)
+ self.mgr.cache.save_host(host)
+ self.log.debug(f'Refreshed OSDSpec previews for host <{host}>')
+ return None
+
+ def update_osdspec_previews(self, search_host: str = '') -> None:
+ # Set global 'pending' flag for host
+ self.mgr.cache.loading_osdspec_preview.add(search_host)
+ previews = []
+ # query OSDSpecs for host <search host> and generate/get the preview
+ # There can be multiple previews for one host due to multiple OSDSpecs.
+ previews.extend(self.mgr.osd_service.get_previews(search_host))
+ self.log.debug(f'Loading OSDSpec previews to HostCache for host <{search_host}>')
+ self.mgr.cache.osdspec_previews[search_host] = previews
+ # Unset global 'pending' flag for host
+ self.mgr.cache.loading_osdspec_preview.remove(search_host)
+
+ def _check_for_strays(self) -> None:
+ self.log.debug('_check_for_strays')
+ for k in ['CEPHADM_STRAY_HOST',
+ 'CEPHADM_STRAY_DAEMON']:
+ self.mgr.remove_health_warning(k)
+ if self.mgr.warn_on_stray_hosts or self.mgr.warn_on_stray_daemons:
+ ls = self.mgr.list_servers()
+ self.log.debug(ls)
+ managed = self.mgr.cache.get_daemon_names()
+ host_detail = [] # type: List[str]
+ host_num_daemons = 0
+ daemon_detail = [] # type: List[str]
+ for item in ls:
+ host = item.get('hostname')
+ assert isinstance(host, str)
+ daemons = item.get('services') # misnomer!
+ assert isinstance(daemons, list)
+ missing_names = []
+ for s in daemons:
+ daemon_id = s.get('id')
+ assert daemon_id
+ name = '%s.%s' % (s.get('type'), daemon_id)
+ if s.get('type') in ['rbd-mirror', 'cephfs-mirror', 'rgw', 'rgw-nfs']:
+ metadata = self.mgr.get_metadata(
+ cast(str, s.get('type')), daemon_id, {})
+ assert metadata is not None
+ try:
+ if s.get('type') == 'rgw-nfs':
+ # https://tracker.ceph.com/issues/49573
+ name = metadata['id'][:-4]
+ else:
+ name = '%s.%s' % (s.get('type'), metadata['id'])
+ except (KeyError, TypeError):
+ self.log.debug(
+ "Failed to find daemon id for %s service %s" % (
+ s.get('type'), s.get('id')
+ )
+ )
+ if s.get('type') == 'tcmu-runner':
+ # because we don't track tcmu-runner daemons in the host cache
+ # and don't have a way to check if the daemon is part of iscsi service
+ # we assume that all tcmu-runner daemons are managed by cephadm
+ managed.append(name)
+ if host not in self.mgr.inventory:
+ missing_names.append(name)
+ host_num_daemons += 1
+ if name not in managed:
+ daemon_detail.append(
+ 'stray daemon %s on host %s not managed by cephadm' % (name, host))
+ if missing_names:
+ host_detail.append(
+ 'stray host %s has %d stray daemons: %s' % (
+ host, len(missing_names), missing_names))
+ if self.mgr.warn_on_stray_hosts and host_detail:
+ self.mgr.set_health_warning(
+ 'CEPHADM_STRAY_HOST', f'{len(host_detail)} stray host(s) with {host_num_daemons} daemon(s) not managed by cephadm', len(host_detail), host_detail)
+ if self.mgr.warn_on_stray_daemons and daemon_detail:
+ self.mgr.set_health_warning(
+ 'CEPHADM_STRAY_DAEMON', f'{len(daemon_detail)} stray daemon(s) not managed by cephadm', len(daemon_detail), daemon_detail)
+
+ def _check_for_moved_osds(self) -> None:
+ self.log.debug('_check_for_moved_osds')
+ all_osds: DefaultDict[int, List[orchestrator.DaemonDescription]] = defaultdict(list)
+ for dd in self.mgr.cache.get_daemons_by_type('osd'):
+ assert dd.daemon_id
+ all_osds[int(dd.daemon_id)].append(dd)
+ for osd_id, dds in all_osds.items():
+ if len(dds) <= 1:
+ continue
+ running = [dd for dd in dds if dd.status == DaemonDescriptionStatus.running]
+ error = [dd for dd in dds if dd.status == DaemonDescriptionStatus.error]
+ msg = f'Found duplicate OSDs: {", ".join(str(dd) for dd in dds)}'
+ logger.info(msg)
+ if len(running) != 1:
+ continue
+ osd = self.mgr.get_osd_by_id(osd_id)
+ if not osd or not osd['up']:
+ continue
+ for e in error:
+ assert e.hostname
+ try:
+ self._remove_daemon(e.name(), e.hostname, no_post_remove=True)
+ self.mgr.events.for_daemon(
+ e.name(), 'INFO', f"Removed duplicated daemon on host '{e.hostname}'")
+ except OrchestratorError as ex:
+ self.mgr.events.from_orch_error(ex)
+ logger.exception(f'failed to remove duplicated daemon {e}')
+
+ def _apply_all_services(self) -> bool:
+ self.log.debug('_apply_all_services')
+ r = False
+ specs = [] # type: List[ServiceSpec]
+ for sn, spec in self.mgr.spec_store.active_specs.items():
+ specs.append(spec)
+ for name in ['CEPHADM_APPLY_SPEC_FAIL', 'CEPHADM_DAEMON_PLACE_FAIL']:
+ self.mgr.remove_health_warning(name)
+ self.mgr.apply_spec_fails = []
+ for spec in specs:
+ try:
+ if self._apply_service(spec):
+ r = True
+ except Exception as e:
+ msg = f'Failed to apply {spec.service_name()} spec {spec}: {str(e)}'
+ self.log.exception(msg)
+ self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e))
+ self.mgr.apply_spec_fails.append((spec.service_name(), str(e)))
+ warnings = []
+ for x in self.mgr.apply_spec_fails:
+ warnings.append(f'{x[0]}: {x[1]}')
+ self.mgr.set_health_warning('CEPHADM_APPLY_SPEC_FAIL',
+ f"Failed to apply {len(self.mgr.apply_spec_fails)} service(s): {','.join(x[0] for x in self.mgr.apply_spec_fails)}",
+ len(self.mgr.apply_spec_fails),
+ warnings)
+ self.mgr.update_watched_hosts()
+ return r
+
+ def _apply_service_config(self, spec: ServiceSpec) -> None:
+ if spec.config:
+ section = utils.name_to_config_section(spec.service_name())
+ for name in ['CEPHADM_INVALID_CONFIG_OPTION', 'CEPHADM_FAILED_SET_OPTION']:
+ self.mgr.remove_health_warning(name)
+ invalid_config_options = []
+ options_failed_to_set = []
+ for k, v in spec.config.items():
+ try:
+ current = self.mgr.get_foreign_ceph_option(section, k)
+ except KeyError:
+ msg = f'Ignoring invalid {spec.service_name()} config option {k}'
+ self.log.warning(msg)
+ self.mgr.events.for_service(
+ spec, OrchestratorEvent.ERROR, f'Invalid config option {k}'
+ )
+ invalid_config_options.append(msg)
+ continue
+ if current != v:
+ self.log.debug(f'setting [{section}] {k} = {v}')
+ try:
+ self.mgr.check_mon_command({
+ 'prefix': 'config set',
+ 'name': k,
+ 'value': str(v),
+ 'who': section,
+ })
+ except MonCommandFailed as e:
+ msg = f'Failed to set {spec.service_name()} option {k}: {e}'
+ self.log.warning(msg)
+ options_failed_to_set.append(msg)
+
+ if invalid_config_options:
+ self.mgr.set_health_warning('CEPHADM_INVALID_CONFIG_OPTION', f'Ignoring {len(invalid_config_options)} invalid config option(s)', len(invalid_config_options), invalid_config_options)
+ if options_failed_to_set:
+ self.mgr.set_health_warning('CEPHADM_FAILED_SET_OPTION', f'Failed to set {len(options_failed_to_set)} option(s)', len(options_failed_to_set), options_failed_to_set)
+
+ def _apply_service(self, spec: ServiceSpec) -> bool:
+ """
+ Schedule a service. Deploy new daemons or remove old ones, depending
+ on the target label and count specified in the placement.
+ """
+ self.mgr.migration.verify_no_migration()
+
+ service_type = spec.service_type
+ service_name = spec.service_name()
+ if spec.unmanaged:
+ self.log.debug('Skipping unmanaged service %s' % service_name)
+ return False
+ if spec.preview_only:
+ self.log.debug('Skipping preview_only service %s' % service_name)
+ return False
+ self.log.debug('Applying service %s spec' % service_name)
+
+ self._apply_service_config(spec)
+
+ if service_type == 'osd':
+ self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec))
+ # TODO: return True would result in a busy loop
+ # can't know if daemon count changed; create_from_spec doesn't
+ # return a solid indication
+ return False
+
+ svc = self.mgr.cephadm_services[service_type]
+ daemons = self.mgr.cache.get_daemons_by_service(service_name)
+
+ public_networks: List[str] = []
+ if service_type == 'mon':
+ out = str(self.mgr.get_foreign_ceph_option('mon', 'public_network'))
+ if '/' in out:
+ public_networks = [x.strip() for x in out.split(',')]
+ self.log.debug('mon public_network(s) is %s' % public_networks)
+
+ def matches_network(host):
+ # type: (str) -> bool
+ # make sure we have 1 or more IPs for any of those networks on that
+ # host
+ for network in public_networks:
+ if len(self.mgr.cache.networks[host].get(network, [])) > 0:
+ return True
+ self.log.info(
+ f"Filtered out host {host}: does not belong to mon public_network"
+ f" ({','.join(public_networks)})"
+ )
+ return False
+
+ rank_map = None
+ if svc.ranked():
+ rank_map = self.mgr.spec_store[spec.service_name()].rank_map or {}
+ ha = HostAssignment(
+ spec=spec,
+ hosts=self.mgr._schedulable_hosts(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
+ daemons=daemons,
+ networks=self.mgr.cache.networks,
+ filter_new_host=(
+ matches_network if service_type == 'mon'
+ else None
+ ),
+ allow_colo=svc.allow_colo(),
+ primary_daemon_type=svc.primary_daemon_type(),
+ per_host_daemon_type=svc.per_host_daemon_type(),
+ rank_map=rank_map,
+ )
+
+ try:
+ all_slots, slots_to_add, daemons_to_remove = ha.place()
+ daemons_to_remove = [d for d in daemons_to_remove if (d.hostname and self.mgr.inventory._inventory[d.hostname].get(
+ 'status', '').lower() not in ['maintenance', 'offline'] and d.hostname not in self.mgr.offline_hosts)]
+ self.log.debug('Add %s, remove %s' % (slots_to_add, daemons_to_remove))
+ except OrchestratorError as e:
+ msg = f'Failed to apply {spec.service_name()} spec {spec}: {str(e)}'
+ self.log.error(msg)
+ self.mgr.events.for_service(spec, 'ERROR', 'Failed to apply: ' + str(e))
+ self.mgr.apply_spec_fails.append((spec.service_name(), str(e)))
+ warnings = []
+ for x in self.mgr.apply_spec_fails:
+ warnings.append(f'{x[0]}: {x[1]}')
+ self.mgr.set_health_warning('CEPHADM_APPLY_SPEC_FAIL',
+ f"Failed to apply {len(self.mgr.apply_spec_fails)} service(s): {','.join(x[0] for x in self.mgr.apply_spec_fails)}",
+ len(self.mgr.apply_spec_fails),
+ warnings)
+ return False
+
+ r = None
+
+ # sanity check
+ final_count = len(daemons) + len(slots_to_add) - len(daemons_to_remove)
+ if service_type in ['mon', 'mgr'] and final_count < 1:
+ self.log.debug('cannot scale mon|mgr below 1)')
+ return False
+
+ # progress
+ progress_id = str(uuid.uuid4())
+ delta: List[str] = []
+ if slots_to_add:
+ delta += [f'+{len(slots_to_add)}']
+ if daemons_to_remove:
+ delta += [f'-{len(daemons_to_remove)}']
+ progress_title = f'Updating {spec.service_name()} deployment ({" ".join(delta)} -> {len(all_slots)})'
+ progress_total = len(slots_to_add) + len(daemons_to_remove)
+ progress_done = 0
+
+ def update_progress() -> None:
+ self.mgr.remote(
+ 'progress', 'update', progress_id,
+ ev_msg=progress_title,
+ ev_progress=(progress_done / progress_total),
+ add_to_ceph_s=True,
+ )
+
+ if progress_total:
+ update_progress()
+
+ # add any?
+ did_config = False
+
+ self.log.debug('Hosts that will receive new daemons: %s' % slots_to_add)
+ self.log.debug('Daemons that will be removed: %s' % daemons_to_remove)
+
+ try:
+ # assign names
+ for i in range(len(slots_to_add)):
+ slot = slots_to_add[i]
+ slot = slot.assign_name(self.mgr.get_unique_name(
+ slot.daemon_type,
+ slot.hostname,
+ [d for d in daemons if d not in daemons_to_remove],
+ prefix=spec.service_id,
+ forcename=slot.name,
+ rank=slot.rank,
+ rank_generation=slot.rank_generation,
+ ))
+ slots_to_add[i] = slot
+ if rank_map is not None:
+ assert slot.rank is not None
+ assert slot.rank_generation is not None
+ assert rank_map[slot.rank][slot.rank_generation] is None
+ rank_map[slot.rank][slot.rank_generation] = slot.name
+
+ if rank_map:
+ # record the rank_map before we make changes so that if we fail the
+ # next mgr will clean up.
+ self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
+
+ # remove daemons now, since we are going to fence them anyway
+ for d in daemons_to_remove:
+ assert d.hostname is not None
+ self._remove_daemon(d.name(), d.hostname)
+ daemons_to_remove = []
+
+ # fence them
+ svc.fence_old_ranks(spec, rank_map, len(all_slots))
+
+ # create daemons
+ daemon_place_fails = []
+ for slot in slots_to_add:
+ # first remove daemon with conflicting port or name?
+ if slot.ports or slot.name in [d.name() for d in daemons_to_remove]:
+ for d in daemons_to_remove:
+ if (
+ d.hostname != slot.hostname
+ or not (set(d.ports or []) & set(slot.ports))
+ or (d.ip and slot.ip and d.ip != slot.ip)
+ and d.name() != slot.name
+ ):
+ continue
+ if d.name() != slot.name:
+ self.log.info(
+ f'Removing {d.name()} before deploying to {slot} to avoid a port or conflict'
+ )
+ # NOTE: we don't check ok-to-stop here to avoid starvation if
+ # there is only 1 gateway.
+ self._remove_daemon(d.name(), d.hostname)
+ daemons_to_remove.remove(d)
+ progress_done += 1
+ break
+
+ # deploy new daemon
+ daemon_id = slot.name
+ if not did_config:
+ svc.config(spec)
+ did_config = True
+
+ daemon_spec = svc.make_daemon_spec(
+ slot.hostname, daemon_id, slot.network, spec,
+ daemon_type=slot.daemon_type,
+ ports=slot.ports,
+ ip=slot.ip,
+ rank=slot.rank,
+ rank_generation=slot.rank_generation,
+ )
+ self.log.debug('Placing %s.%s on host %s' % (
+ slot.daemon_type, daemon_id, slot.hostname))
+
+ try:
+ daemon_spec = svc.prepare_create(daemon_spec)
+ self._create_daemon(daemon_spec)
+ r = True
+ progress_done += 1
+ update_progress()
+ except (RuntimeError, OrchestratorError) as e:
+ msg = (f"Failed while placing {slot.daemon_type}.{daemon_id} "
+ f"on {slot.hostname}: {e}")
+ self.mgr.events.for_service(spec, 'ERROR', msg)
+ self.mgr.log.error(msg)
+ daemon_place_fails.append(msg)
+ # only return "no change" if no one else has already succeeded.
+ # later successes will also change to True
+ if r is None:
+ r = False
+ progress_done += 1
+ update_progress()
+ continue
+
+ # add to daemon list so next name(s) will also be unique
+ sd = orchestrator.DaemonDescription(
+ hostname=slot.hostname,
+ daemon_type=slot.daemon_type,
+ daemon_id=daemon_id,
+ )
+ daemons.append(sd)
+
+ if daemon_place_fails:
+ self.mgr.set_health_warning('CEPHADM_DAEMON_PLACE_FAIL', f'Failed to place {len(daemon_place_fails)} daemon(s)', len(daemon_place_fails), daemon_place_fails)
+
+ if service_type == 'mgr':
+ active_mgr = svc.get_active_daemon(self.mgr.cache.get_daemons_by_type('mgr'))
+ if active_mgr.daemon_id in [d.daemon_id for d in daemons_to_remove]:
+ # We can't just remove the active mgr like any other daemon.
+ # Need to fail over later so it can be removed on next pass.
+ # This can be accomplished by scheduling a restart of the active mgr.
+ self.mgr._schedule_daemon_action(active_mgr.name(), 'restart')
+
+ # remove any?
+ def _ok_to_stop(remove_daemons: List[orchestrator.DaemonDescription]) -> bool:
+ daemon_ids = [d.daemon_id for d in remove_daemons]
+ assert None not in daemon_ids
+ # setting force flag retains previous behavior
+ r = svc.ok_to_stop(cast(List[str], daemon_ids), force=True)
+ return not r.retval
+
+ while daemons_to_remove and not _ok_to_stop(daemons_to_remove):
+ # let's find a subset that is ok-to-stop
+ daemons_to_remove.pop()
+ for d in daemons_to_remove:
+ r = True
+ assert d.hostname is not None
+ self._remove_daemon(d.name(), d.hostname)
+
+ progress_done += 1
+ update_progress()
+
+ self.mgr.remote('progress', 'complete', progress_id)
+ except Exception as e:
+ self.mgr.remote('progress', 'fail', progress_id, str(e))
+ raise
+
+ if r is None:
+ r = False
+ return r
+
+ def _check_daemons(self) -> None:
+ self.log.debug('_check_daemons')
+ daemons = self.mgr.cache.get_daemons()
+ daemons_post: Dict[str, List[orchestrator.DaemonDescription]] = defaultdict(list)
+ for dd in daemons:
+ # orphan?
+ spec = self.mgr.spec_store.active_specs.get(dd.service_name(), None)
+ assert dd.hostname is not None
+ assert dd.daemon_type is not None
+ assert dd.daemon_id is not None
+ if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']:
+ # (mon and mgr specs should always exist; osds aren't matched
+ # to a service spec)
+ self.log.info('Removing orphan daemon %s...' % dd.name())
+ self._remove_daemon(dd.name(), dd.hostname)
+
+ # ignore unmanaged services
+ if spec and spec.unmanaged:
+ continue
+
+ # ignore daemons for deleted services
+ if dd.service_name() in self.mgr.spec_store.spec_deleted:
+ continue
+
+ # These daemon types require additional configs after creation
+ if dd.daemon_type in REQUIRES_POST_ACTIONS:
+ daemons_post[dd.daemon_type].append(dd)
+
+ if self.mgr.cephadm_services[daemon_type_to_service(dd.daemon_type)].get_active_daemon(
+ self.mgr.cache.get_daemons_by_service(dd.service_name())).daemon_id == dd.daemon_id:
+ dd.is_active = True
+ else:
+ dd.is_active = False
+
+ deps = self.mgr._calc_daemon_deps(spec, dd.daemon_type, dd.daemon_id)
+ last_deps, last_config = self.mgr.cache.get_daemon_last_config_deps(
+ dd.hostname, dd.name())
+ if last_deps is None:
+ last_deps = []
+ action = self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name())
+ if not last_config:
+ self.log.info('Reconfiguring %s (unknown last config time)...' % (
+ dd.name()))
+ action = 'reconfig'
+ elif last_deps != deps:
+ self.log.debug('%s deps %s -> %s' % (dd.name(), last_deps,
+ deps))
+ self.log.info('Reconfiguring %s (dependencies changed)...' % (
+ dd.name()))
+ action = 'reconfig'
+ elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args:
+ self.log.debug(
+ f'{dd.name()} container cli args {dd.extra_container_args} -> {spec.extra_container_args}')
+ self.log.info(f'Redeploying {dd.name()}, (container cli args changed) . . .')
+ dd.extra_container_args = spec.extra_container_args
+ action = 'redeploy'
+ elif self.mgr.last_monmap and \
+ self.mgr.last_monmap > last_config and \
+ dd.daemon_type in CEPH_TYPES:
+ self.log.info('Reconfiguring %s (monmap changed)...' % dd.name())
+ action = 'reconfig'
+ elif self.mgr.extra_ceph_conf_is_newer(last_config) and \
+ dd.daemon_type in CEPH_TYPES:
+ self.log.info('Reconfiguring %s (extra config changed)...' % dd.name())
+ action = 'reconfig'
+ if action:
+ if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \
+ and action == 'reconfig':
+ action = 'redeploy'
+ try:
+ daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd)
+ self.mgr._daemon_action(daemon_spec, action=action)
+ if self.mgr.cache.rm_scheduled_daemon_action(dd.hostname, dd.name()):
+ self.mgr.cache.save_host(dd.hostname)
+ except OrchestratorError as e:
+ self.mgr.events.from_orch_error(e)
+ if dd.daemon_type in daemons_post:
+ del daemons_post[dd.daemon_type]
+ # continue...
+ except Exception as e:
+ self.mgr.events.for_daemon_from_exception(dd.name(), e)
+ if dd.daemon_type in daemons_post:
+ del daemons_post[dd.daemon_type]
+ # continue...
+
+ # do daemon post actions
+ for daemon_type, daemon_descs in daemons_post.items():
+ run_post = False
+ for d in daemon_descs:
+ if d.name() in self.mgr.requires_post_actions:
+ self.mgr.requires_post_actions.remove(d.name())
+ run_post = True
+ if run_post:
+ self.mgr._get_cephadm_service(daemon_type_to_service(
+ daemon_type)).daemon_check_post(daemon_descs)
+
+ def _purge_deleted_services(self) -> None:
+ self.log.debug('_purge_deleted_services')
+ existing_services = self.mgr.spec_store.all_specs.items()
+ for service_name, spec in list(existing_services):
+ if service_name not in self.mgr.spec_store.spec_deleted:
+ continue
+ if self.mgr.cache.get_daemons_by_service(service_name):
+ continue
+ if spec.service_type in ['mon', 'mgr']:
+ continue
+
+ logger.info(f'Purge service {service_name}')
+
+ self.mgr.cephadm_services[spec.service_type].purge(service_name)
+ self.mgr.spec_store.finally_rm(service_name)
+
+ def convert_tags_to_repo_digest(self) -> None:
+ if not self.mgr.use_repo_digest:
+ return
+ settings = self.mgr.upgrade.get_distinct_container_image_settings()
+ digests: Dict[str, ContainerInspectInfo] = {}
+ for container_image_ref in set(settings.values()):
+ if not is_repo_digest(container_image_ref):
+ image_info = self._get_container_image_info(container_image_ref)
+ if image_info.repo_digests:
+ # FIXME: we assume the first digest here is the best
+ assert is_repo_digest(image_info.repo_digests[0]), image_info
+ digests[container_image_ref] = image_info
+
+ for entity, container_image_ref in settings.items():
+ if not is_repo_digest(container_image_ref):
+ image_info = digests[container_image_ref]
+ if image_info.repo_digests:
+ # FIXME: we assume the first digest here is the best
+ self.mgr.set_container_image(entity, image_info.repo_digests[0])
+
+ def _calc_client_files(self) -> Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]]:
+ # host -> path -> (mode, uid, gid, content, digest)
+ client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]] = {}
+
+ # ceph.conf
+ config = self.mgr.get_minimal_ceph_conf().encode('utf-8')
+ config_digest = ''.join('%02x' % c for c in hashlib.sha256(config).digest())
+
+ if self.mgr.manage_etc_ceph_ceph_conf:
+ try:
+ pspec = PlacementSpec.from_string(self.mgr.manage_etc_ceph_ceph_conf_hosts)
+ ha = HostAssignment(
+ spec=ServiceSpec('mon', placement=pspec),
+ hosts=self.mgr._schedulable_hosts(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
+ daemons=[],
+ networks=self.mgr.cache.networks,
+ )
+ all_slots, _, _ = ha.place()
+ for host in {s.hostname for s in all_slots}:
+ if host not in client_files:
+ client_files[host] = {}
+ client_files[host]['/etc/ceph/ceph.conf'] = (
+ 0o644, 0, 0, bytes(config), str(config_digest)
+ )
+ except Exception as e:
+ self.mgr.log.warning(
+ f'unable to calc conf hosts: {self.mgr.manage_etc_ceph_ceph_conf_hosts}: {e}')
+
+ # client keyrings
+ for ks in self.mgr.keys.keys.values():
+ try:
+ ret, keyring, err = self.mgr.mon_command({
+ 'prefix': 'auth get',
+ 'entity': ks.entity,
+ })
+ if ret:
+ self.log.warning(f'unable to fetch keyring for {ks.entity}')
+ continue
+ digest = ''.join('%02x' % c for c in hashlib.sha256(
+ keyring.encode('utf-8')).digest())
+ ha = HostAssignment(
+ spec=ServiceSpec('mon', placement=ks.placement),
+ hosts=self.mgr._schedulable_hosts(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
+ daemons=[],
+ networks=self.mgr.cache.networks,
+ )
+ all_slots, _, _ = ha.place()
+ for host in {s.hostname for s in all_slots}:
+ if host not in client_files:
+ client_files[host] = {}
+ client_files[host]['/etc/ceph/ceph.conf'] = (
+ 0o644, 0, 0, bytes(config), str(config_digest)
+ )
+ client_files[host][ks.path] = (
+ ks.mode, ks.uid, ks.gid, keyring.encode('utf-8'), digest
+ )
+ except Exception as e:
+ self.log.warning(
+ f'unable to calc client keyring {ks.entity} placement {ks.placement}: {e}')
+ return client_files
+
+ def _write_client_files(self,
+ client_files: Dict[str, Dict[str, Tuple[int, int, int, bytes, str]]],
+ host: str) -> None:
+ updated_files = False
+ old_files = self.mgr.cache.get_host_client_files(host).copy()
+ for path, m in client_files.get(host, {}).items():
+ mode, uid, gid, content, digest = m
+ if path in old_files:
+ match = old_files[path] == (digest, mode, uid, gid)
+ del old_files[path]
+ if match:
+ continue
+ self.log.info(f'Updating {host}:{path}')
+ self._write_remote_file(host, path, content, mode, uid, gid)
+ self.mgr.cache.update_client_file(host, path, digest, mode, uid, gid)
+ updated_files = True
+ for path in old_files.keys():
+ if path == '/etc/ceph/ceph.conf':
+ continue
+ self.log.info(f'Removing {host}:{path}')
+ with self._remote_connection(host) as tpl:
+ conn, connr = tpl
+ out, err, code = remoto.process.check(
+ conn,
+ ['rm', '-f', path])
+ updated_files = True
+ self.mgr.cache.removed_client_file(host, path)
+ if updated_files:
+ self.mgr.cache.save_host(host)
+
+ def _create_daemon(self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ reconfig: bool = False,
+ osd_uuid_map: Optional[Dict[str, Any]] = None,
+ ) -> str:
+
+ with set_exception_subject('service', orchestrator.DaemonDescription(
+ daemon_type=daemon_spec.daemon_type,
+ daemon_id=daemon_spec.daemon_id,
+ hostname=daemon_spec.host,
+ ).service_id(), overwrite=True):
+
+ try:
+ image = ''
+ start_time = datetime_now()
+ ports: List[int] = daemon_spec.ports if daemon_spec.ports else []
+
+ if daemon_spec.daemon_type == 'container':
+ spec = cast(CustomContainerSpec,
+ self.mgr.spec_store[daemon_spec.service_name].spec)
+ image = spec.image
+ if spec.ports:
+ ports.extend(spec.ports)
+
+ if daemon_spec.daemon_type == 'cephadm-exporter':
+ if not reconfig:
+ assert daemon_spec.host
+ self._deploy_cephadm_binary(daemon_spec.host)
+
+ # TCP port to open in the host firewall
+ if len(ports) > 0:
+ daemon_spec.extra_args.extend([
+ '--tcp-ports', ' '.join(map(str, ports))
+ ])
+
+ # osd deployments needs an --osd-uuid arg
+ if daemon_spec.daemon_type == 'osd':
+ if not osd_uuid_map:
+ osd_uuid_map = self.mgr.get_osd_uuid_map()
+ osd_uuid = osd_uuid_map.get(daemon_spec.daemon_id)
+ if not osd_uuid:
+ raise OrchestratorError('osd.%s not in osdmap' % daemon_spec.daemon_id)
+ daemon_spec.extra_args.extend(['--osd-fsid', osd_uuid])
+
+ if reconfig:
+ daemon_spec.extra_args.append('--reconfig')
+ if self.mgr.allow_ptrace:
+ daemon_spec.extra_args.append('--allow-ptrace')
+
+ try:
+ eca = daemon_spec.extra_container_args
+ if eca:
+ for a in eca:
+ daemon_spec.extra_args.append(f'--extra-container-args={a}')
+ except AttributeError:
+ eca = None
+
+ if self.mgr.cache.host_needs_registry_login(daemon_spec.host) and self.mgr.registry_url:
+ self._registry_login(daemon_spec.host,
+ json.loads(str(self.mgr.get_store('registry_credentials'))))
+
+ self.log.info('%s daemon %s on %s' % (
+ 'Reconfiguring' if reconfig else 'Deploying',
+ daemon_spec.name(), daemon_spec.host))
+
+ out, err, code = self._run_cephadm(
+ daemon_spec.host, daemon_spec.name(), 'deploy',
+ [
+ '--name', daemon_spec.name(),
+ '--meta-json', json.dumps({
+ 'service_name': daemon_spec.service_name,
+ 'ports': daemon_spec.ports,
+ 'ip': daemon_spec.ip,
+ 'deployed_by': self.mgr.get_active_mgr_digests(),
+ 'rank': daemon_spec.rank,
+ 'rank_generation': daemon_spec.rank_generation,
+ 'extra_container_args': eca
+ }),
+ '--config-json', '-',
+ ] + daemon_spec.extra_args,
+ stdin=json.dumps(daemon_spec.final_config),
+ image=image,
+ )
+
+ # refresh daemon state? (ceph daemon reconfig does not need it)
+ if not reconfig or daemon_spec.daemon_type not in CEPH_TYPES:
+ if not code and daemon_spec.host in self.mgr.cache.daemons:
+ # prime cached service state with what we (should have)
+ # just created
+ sd = daemon_spec.to_daemon_description(
+ DaemonDescriptionStatus.starting, 'starting')
+ self.mgr.cache.add_daemon(daemon_spec.host, sd)
+ if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS:
+ self.mgr.requires_post_actions.add(daemon_spec.name())
+ self.mgr.cache.invalidate_host_daemons(daemon_spec.host)
+
+ self.mgr.cache.update_daemon_config_deps(
+ daemon_spec.host, daemon_spec.name(), daemon_spec.deps, start_time)
+ self.mgr.cache.save_host(daemon_spec.host)
+ msg = "{} {} on host '{}'".format(
+ 'Reconfigured' if reconfig else 'Deployed', daemon_spec.name(), daemon_spec.host)
+ if not code:
+ self.mgr.events.for_daemon(daemon_spec.name(), OrchestratorEvent.INFO, msg)
+ else:
+ what = 'reconfigure' if reconfig else 'deploy'
+ self.mgr.events.for_daemon(
+ daemon_spec.name(), OrchestratorEvent.ERROR, f'Failed to {what}: {err}')
+ return msg
+ except OrchestratorError:
+ redeploy = daemon_spec.name() in self.mgr.cache.get_daemon_names()
+ if not reconfig and not redeploy:
+ # we have to clean up the daemon. E.g. keyrings.
+ servict_type = daemon_type_to_service(daemon_spec.daemon_type)
+ dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed')
+ self.mgr.cephadm_services[servict_type].post_remove(dd, is_failed_deploy=True)
+ raise
+
+ def _remove_daemon(self, name: str, host: str, no_post_remove: bool = False) -> str:
+ """
+ Remove a daemon
+ """
+ (daemon_type, daemon_id) = name.split('.', 1)
+ daemon = orchestrator.DaemonDescription(
+ daemon_type=daemon_type,
+ daemon_id=daemon_id,
+ hostname=host)
+
+ with set_exception_subject('service', daemon.service_id(), overwrite=True):
+
+ self.mgr.cephadm_services[daemon_type_to_service(daemon_type)].pre_remove(daemon)
+ # NOTE: we are passing the 'force' flag here, which means
+ # we can delete a mon instances data.
+ dd = self.mgr.cache.get_daemon(daemon.daemon_name)
+ if dd.ports:
+ args = ['--name', name, '--force', '--tcp-ports', ' '.join(map(str, dd.ports))]
+ else:
+ args = ['--name', name, '--force']
+
+ self.log.info('Removing daemon %s from %s -- ports %s' % (name, host, dd.ports))
+ out, err, code = self._run_cephadm(
+ host, name, 'rm-daemon', args)
+ if not code:
+ # remove item from cache
+ self.mgr.cache.rm_daemon(host, name)
+ self.mgr.cache.invalidate_host_daemons(host)
+
+ if not no_post_remove:
+ self.mgr.cephadm_services[daemon_type_to_service(
+ daemon_type)].post_remove(daemon, is_failed_deploy=False)
+
+ return "Removed {} from host '{}'".format(name, host)
+
+ def _run_cephadm_json(self,
+ host: str,
+ entity: Union[CephadmNoImage, str],
+ command: str,
+ args: List[str],
+ no_fsid: Optional[bool] = False,
+ image: Optional[str] = "",
+ ) -> Any:
+ try:
+ out, err, code = self._run_cephadm(
+ host, entity, command, args, no_fsid=no_fsid, image=image)
+ if code:
+ raise OrchestratorError(f'host {host} `cephadm {command}` returned {code}: {err}')
+ except Exception as e:
+ raise OrchestratorError(f'host {host} `cephadm {command}` failed: {e}')
+ try:
+ return json.loads(''.join(out))
+ except (ValueError, KeyError):
+ msg = f'host {host} `cephadm {command}` failed: Cannot decode JSON'
+ self.log.exception(f'{msg}: {"".join(out)}')
+ raise OrchestratorError(msg)
+
+ def _run_cephadm(self,
+ host: str,
+ entity: Union[CephadmNoImage, str],
+ command: str,
+ args: List[str],
+ addr: Optional[str] = "",
+ stdin: Optional[str] = "",
+ no_fsid: Optional[bool] = False,
+ error_ok: Optional[bool] = False,
+ image: Optional[str] = "",
+ env_vars: Optional[List[str]] = None,
+ ) -> Tuple[List[str], List[str], int]:
+ """
+ Run cephadm on the remote host with the given command + args
+
+ Important: You probably don't want to run _run_cephadm from CLI handlers
+
+ :env_vars: in format -> [KEY=VALUE, ..]
+ """
+ self.log.debug(f"_run_cephadm : command = {command}")
+ self.log.debug(f"_run_cephadm : args = {args}")
+
+ bypass_image = ('cephadm-exporter',)
+
+ with self._remote_connection(host, addr) as tpl:
+ conn, connr = tpl
+ assert image or entity
+ # Skip the image check for daemons deployed that are not ceph containers
+ if not str(entity).startswith(bypass_image):
+ if not image and entity is not cephadmNoImage:
+ image = self.mgr._get_container_image(entity)
+
+ final_args = []
+
+ # global args
+ if env_vars:
+ for env_var_pair in env_vars:
+ final_args.extend(['--env', env_var_pair])
+
+ if image:
+ final_args.extend(['--image', image])
+
+ if not self.mgr.container_init:
+ final_args += ['--no-container-init']
+
+ # subcommand
+ final_args.append(command)
+
+ # subcommand args
+ if not no_fsid:
+ final_args += ['--fsid', self.mgr._cluster_fsid]
+
+ final_args += args
+
+ # exec
+ self.log.debug('args: %s' % (' '.join(final_args)))
+ if self.mgr.mode == 'root':
+ if stdin:
+ self.log.debug('stdin: %s' % stdin)
+
+ try:
+ # if host has gone offline this is likely where we'll fail first
+ python = connr.choose_python()
+ except RuntimeError as e:
+ self.mgr.offline_hosts.add(host)
+ self.mgr._reset_con(host)
+ if error_ok:
+ return [], [str(e)], 1
+ raise
+ if not python:
+ raise RuntimeError(
+ 'unable to find python on %s (tried %s in %s)' % (
+ host, remotes.PYTHONS, remotes.PATH))
+ try:
+ out, err, code = remoto.process.check(
+ conn,
+ [python, self.mgr.cephadm_binary_path] + final_args,
+ stdin=stdin.encode('utf-8') if stdin is not None else None)
+ if code == 2:
+ out_ls, err_ls, code_ls = remoto.process.check(
+ conn, ['ls', self.mgr.cephadm_binary_path])
+ if code_ls == 2:
+ self._deploy_cephadm_binary_conn(conn, host)
+ out, err, code = remoto.process.check(
+ conn,
+ [python, self.mgr.cephadm_binary_path] + final_args,
+ stdin=stdin.encode('utf-8') if stdin is not None else None)
+
+ except RuntimeError as e:
+ self.mgr._reset_con(host)
+ if error_ok:
+ return [], [str(e)], 1
+ raise
+
+ elif self.mgr.mode == 'cephadm-package':
+ try:
+ out, err, code = remoto.process.check(
+ conn,
+ ['sudo', '/usr/bin/cephadm'] + final_args,
+ stdin=stdin)
+ except RuntimeError as e:
+ self.mgr._reset_con(host)
+ if error_ok:
+ return [], [str(e)], 1
+ raise
+ else:
+ assert False, 'unsupported mode'
+
+ self.log.debug('code: %d' % code)
+ if out:
+ self.log.debug('out: %s' % '\n'.join(out))
+ if err:
+ self.log.debug('err: %s' % '\n'.join(err))
+ if code and not error_ok:
+ raise OrchestratorError(
+ 'cephadm exited with an error code: %d, stderr:%s' % (
+ code, '\n'.join(err)))
+ return out, err, code
+
+ def _get_container_image_info(self, image_name: str) -> ContainerInspectInfo:
+ # pick a random host...
+ host = None
+ for host_name in self.mgr.inventory.keys():
+ host = host_name
+ break
+ if not host:
+ raise OrchestratorError('no hosts defined')
+ if self.mgr.cache.host_needs_registry_login(host) and self.mgr.registry_url:
+ self._registry_login(host,
+ json.loads(str(self.mgr.get_store('registry_credentials'))))
+
+ pullargs: List[str] = []
+ if self.mgr.registry_insecure:
+ pullargs.append("--insecure")
+
+ j = self._run_cephadm_json(host, '', 'pull', pullargs, image=image_name, no_fsid=True)
+
+ r = ContainerInspectInfo(
+ j['image_id'],
+ j.get('ceph_version'),
+ j.get('repo_digests')
+ )
+ self.log.debug(f'image {image_name} -> {r}')
+ return r
+
+ # function responsible for logging single host into custom registry
+ def _registry_login(self, host: str, registry_json: Dict[str, str]) -> Optional[str]:
+ self.log.debug(
+ f"Attempting to log host {host} into custom registry @ {registry_json['url']}")
+ # want to pass info over stdin rather than through normal list of args
+ out, err, code = self._run_cephadm(
+ host, 'mon', 'registry-login',
+ ['--registry-json', '-'], stdin=json.dumps(registry_json), error_ok=True)
+ if code:
+ return f"Host {host} failed to login to {registry_json['url']} as {registry_json['username']} with given password"
+ return None
+
+ def _deploy_cephadm_binary(self, host: str) -> None:
+ # Use tee (from coreutils) to create a copy of cephadm on the target machine
+ self.log.info(f"Deploying cephadm binary to {host}")
+ with self._remote_connection(host) as tpl:
+ conn, _connr = tpl
+ return self._deploy_cephadm_binary_conn(conn, host)
+
+ def _deploy_cephadm_binary_conn(self, conn: "BaseConnection", host: str) -> None:
+ _out, _err, code = remoto.process.check(
+ conn,
+ ['mkdir', '-p', f'/var/lib/ceph/{self.mgr._cluster_fsid}'])
+ if code:
+ msg = f"Unable to deploy the cephadm binary to {host}: {_err}"
+ self.log.warning(msg)
+ raise OrchestratorError(msg)
+ _out, _err, code = remoto.process.check(
+ conn,
+ ['tee', '-', self.mgr.cephadm_binary_path],
+ stdin=self.mgr._cephadm.encode('utf-8'))
+ if code:
+ msg = f"Unable to deploy the cephadm binary to {host}: {_err}"
+ self.log.warning(msg)
+ raise OrchestratorError(msg)
+
+ def _write_remote_file(self,
+ host: str,
+ path: str,
+ content: bytes,
+ mode: int,
+ uid: int,
+ gid: int) -> None:
+ with self._remote_connection(host) as tpl:
+ conn, connr = tpl
+ try:
+ errmsg = connr.write_file(path, content, mode, uid, gid)
+ if errmsg is not None:
+ raise OrchestratorError(errmsg)
+ except Exception as e:
+ msg = f"Unable to write {host}:{path}: {e}"
+ self.log.warning(msg)
+ raise OrchestratorError(msg)
+
+ @contextmanager
+ def _remote_connection(self,
+ host: str,
+ addr: Optional[str] = None,
+ ) -> Iterator[Tuple["BaseConnection", Any]]:
+ if not addr and host in self.mgr.inventory:
+ addr = self.mgr.inventory.get_addr(host)
+
+ self.mgr.offline_hosts_remove(host)
+
+ try:
+ try:
+ if not addr:
+ raise OrchestratorError("host address is empty")
+ conn, connr = self.mgr._get_connection(addr)
+ except OSError as e:
+ self.mgr._reset_con(host)
+ msg = f"Can't communicate with remote host `{addr}`, possibly because python3 is not installed there: {str(e)}"
+ raise execnet.gateway_bootstrap.HostNotFound(msg)
+
+ yield (conn, connr)
+
+ except execnet.gateway_bootstrap.HostNotFound as e:
+ # this is a misleading exception as it seems to be thrown for
+ # any sort of connection failure, even those having nothing to
+ # do with "host not found" (e.g., ssh key permission denied).
+ self.mgr.offline_hosts.add(host)
+ self.mgr._reset_con(host)
+
+ user = self.mgr.ssh_user if self.mgr.mode == 'root' else 'cephadm'
+ if str(e).startswith("Can't communicate"):
+ msg = str(e)
+ else:
+ msg = f'''Failed to connect to {host} ({addr}).
+Please make sure that the host is reachable and accepts connections using the cephadm SSH key
+
+To add the cephadm SSH key to the host:
+> ceph cephadm get-pub-key > ~/ceph.pub
+> ssh-copy-id -f -i ~/ceph.pub {user}@{addr}
+
+To check that the host is reachable open a new shell with the --no-hosts flag:
+> cephadm shell --no-hosts
+
+Then run the following:
+> ceph cephadm get-ssh-config > ssh_config
+> ceph config-key get mgr/cephadm/ssh_identity_key > ~/cephadm_private_key
+> chmod 0600 ~/cephadm_private_key
+> ssh -F ssh_config -i ~/cephadm_private_key {user}@{addr}'''
+ raise OrchestratorError(msg) from e
+ except Exception as ex:
+ self.log.exception(ex)
+ raise
diff --git a/src/pybind/mgr/cephadm/services/__init__.py b/src/pybind/mgr/cephadm/services/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/__init__.py
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py
new file mode 100644
index 000000000..92d293fcd
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -0,0 +1,1043 @@
+import errno
+import json
+import logging
+import re
+import socket
+import time
+from abc import ABCMeta, abstractmethod
+from typing import TYPE_CHECKING, List, Callable, TypeVar, \
+ Optional, Dict, Any, Tuple, NewType, cast
+
+from mgr_module import HandleCommandResult, MonCommandFailed
+
+from ceph.deployment.service_spec import ServiceSpec, RGWSpec
+from ceph.deployment.utils import is_ipv6, unwrap_ipv6
+from mgr_util import build_url
+from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus
+from orchestrator._interface import daemon_type_to_service
+from cephadm import utils
+
+if TYPE_CHECKING:
+ from cephadm.module import CephadmOrchestrator
+
+logger = logging.getLogger(__name__)
+
+ServiceSpecs = TypeVar('ServiceSpecs', bound=ServiceSpec)
+AuthEntity = NewType('AuthEntity', str)
+
+
+class CephadmDaemonDeploySpec:
+ # typing.NamedTuple + Generic is broken in py36
+ def __init__(self, host: str, daemon_id: str,
+ service_name: str,
+ network: Optional[str] = None,
+ keyring: Optional[str] = None,
+ extra_args: Optional[List[str]] = None,
+ ceph_conf: str = '',
+ extra_files: Optional[Dict[str, Any]] = None,
+ daemon_type: Optional[str] = None,
+ ip: Optional[str] = None,
+ ports: Optional[List[int]] = None,
+ rank: Optional[int] = None,
+ rank_generation: Optional[int] = None,
+ extra_container_args: Optional[List[str]] = None):
+ """
+ A data struction to encapsulate `cephadm deploy ...
+ """
+ self.host: str = host
+ self.daemon_id = daemon_id
+ self.service_name = service_name
+ daemon_type = daemon_type or (service_name.split('.')[0])
+ assert daemon_type is not None
+ self.daemon_type: str = daemon_type
+
+ # mons
+ self.network = network
+
+ # for run_cephadm.
+ self.keyring: Optional[str] = keyring
+
+ # For run_cephadm. Would be great to have more expressive names.
+ self.extra_args: List[str] = extra_args or []
+
+ self.ceph_conf = ceph_conf
+ self.extra_files = extra_files or {}
+
+ # TCP ports used by the daemon
+ self.ports: List[int] = ports or []
+ self.ip: Optional[str] = ip
+
+ # values to be populated during generate_config calls
+ # and then used in _run_cephadm
+ self.final_config: Dict[str, Any] = {}
+ self.deps: List[str] = []
+
+ self.rank: Optional[int] = rank
+ self.rank_generation: Optional[int] = rank_generation
+
+ self.extra_container_args = extra_container_args
+
+ def name(self) -> str:
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def config_get_files(self) -> Dict[str, Any]:
+ files = self.extra_files
+ if self.ceph_conf:
+ files['config'] = self.ceph_conf
+
+ return files
+
+ @staticmethod
+ def from_daemon_description(dd: DaemonDescription) -> 'CephadmDaemonDeploySpec':
+ assert dd.hostname
+ assert dd.daemon_id
+ assert dd.daemon_type
+ return CephadmDaemonDeploySpec(
+ host=dd.hostname,
+ daemon_id=dd.daemon_id,
+ daemon_type=dd.daemon_type,
+ service_name=dd.service_name(),
+ ip=dd.ip,
+ ports=dd.ports,
+ rank=dd.rank,
+ rank_generation=dd.rank_generation,
+ extra_container_args=dd.extra_container_args,
+ )
+
+ def to_daemon_description(self, status: DaemonDescriptionStatus, status_desc: str) -> DaemonDescription:
+ return DaemonDescription(
+ daemon_type=self.daemon_type,
+ daemon_id=self.daemon_id,
+ service_name=self.service_name,
+ hostname=self.host,
+ status=status,
+ status_desc=status_desc,
+ ip=self.ip,
+ ports=self.ports,
+ rank=self.rank,
+ rank_generation=self.rank_generation,
+ extra_container_args=self.extra_container_args,
+ )
+
+
+class CephadmService(metaclass=ABCMeta):
+ """
+ Base class for service types. Often providing a create() and config() fn.
+ """
+
+ @property
+ @abstractmethod
+ def TYPE(self) -> str:
+ pass
+
+ def __init__(self, mgr: "CephadmOrchestrator"):
+ self.mgr: "CephadmOrchestrator" = mgr
+
+ def allow_colo(self) -> bool:
+ """
+ Return True if multiple daemons of the same type can colocate on
+ the same host.
+ """
+ return False
+
+ def primary_daemon_type(self) -> str:
+ """
+ This is the type of the primary (usually only) daemon to be deployed.
+ """
+ return self.TYPE
+
+ def per_host_daemon_type(self) -> Optional[str]:
+ """
+ If defined, this type of daemon will be deployed once for each host
+ containing one or more daemons of the primary type.
+ """
+ return None
+
+ def ranked(self) -> bool:
+ """
+ If True, we will assign a stable rank (0, 1, ...) and monotonically increasing
+ generation (0, 1, ...) to each daemon we create/deploy.
+ """
+ return False
+
+ def fence_old_ranks(self,
+ spec: ServiceSpec,
+ rank_map: Dict[int, Dict[int, Optional[str]]],
+ num_ranks: int) -> None:
+ assert False
+
+ def make_daemon_spec(
+ self,
+ host: str,
+ daemon_id: str,
+ network: str,
+ spec: ServiceSpecs,
+ daemon_type: Optional[str] = None,
+ ports: Optional[List[int]] = None,
+ ip: Optional[str] = None,
+ rank: Optional[int] = None,
+ rank_generation: Optional[int] = None,
+ ) -> CephadmDaemonDeploySpec:
+ try:
+ eca = spec.extra_container_args
+ except AttributeError:
+ eca = None
+ return CephadmDaemonDeploySpec(
+ host=host,
+ daemon_id=daemon_id,
+ service_name=spec.service_name(),
+ network=network,
+ daemon_type=daemon_type,
+ ports=ports,
+ ip=ip,
+ rank=rank,
+ rank_generation=rank_generation,
+ extra_container_args=eca,
+ )
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ raise NotImplementedError()
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ raise NotImplementedError()
+
+ def config(self, spec: ServiceSpec) -> None:
+ """
+ Configure the cluster for this service. Only called *once* per
+ service apply. Not for every daemon.
+ """
+ pass
+
+ def daemon_check_post(self, daemon_descrs: List[DaemonDescription]) -> None:
+ """The post actions needed to be done after daemons are checked"""
+ if self.mgr.config_dashboard:
+ if 'dashboard' in self.mgr.get('mgr_map')['modules']:
+ self.config_dashboard(daemon_descrs)
+ else:
+ logger.debug('Dashboard is not enabled. Skip configuration.')
+
+ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
+ """Config dashboard settings."""
+ raise NotImplementedError()
+
+ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+ # if this is called for a service type where it hasn't explcitly been
+ # defined, return empty Daemon Desc
+ return DaemonDescription()
+
+ def get_keyring_with_caps(self, entity: AuthEntity, caps: List[str]) -> str:
+ ret, keyring, err = self.mgr.mon_command({
+ 'prefix': 'auth get-or-create',
+ 'entity': entity,
+ 'caps': caps,
+ })
+ if err:
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'auth caps',
+ 'entity': entity,
+ 'caps': caps,
+ })
+ if err:
+ self.mgr.log.warning(f"Unable to update caps for {entity}")
+ return keyring
+
+ def _inventory_get_fqdn(self, hostname: str) -> str:
+ """Get a host's FQDN with its hostname.
+
+ If the FQDN can't be resolved, the address from the inventory will
+ be returned instead.
+ """
+ addr = self.mgr.inventory.get_addr(hostname)
+ return socket.getfqdn(addr)
+
+ def _set_service_url_on_dashboard(self,
+ service_name: str,
+ get_mon_cmd: str,
+ set_mon_cmd: str,
+ service_url: str) -> None:
+ """A helper to get and set service_url via Dashboard's MON command.
+
+ If result of get_mon_cmd differs from service_url, set_mon_cmd will
+ be sent to set the service_url.
+ """
+ def get_set_cmd_dicts(out: str) -> List[dict]:
+ cmd_dict = {
+ 'prefix': set_mon_cmd,
+ 'value': service_url
+ }
+ return [cmd_dict] if service_url != out else []
+
+ self._check_and_set_dashboard(
+ service_name=service_name,
+ get_cmd=get_mon_cmd,
+ get_set_cmd_dicts=get_set_cmd_dicts
+ )
+
+ def _check_and_set_dashboard(self,
+ service_name: str,
+ get_cmd: str,
+ get_set_cmd_dicts: Callable[[str], List[dict]]) -> None:
+ """A helper to set configs in the Dashboard.
+
+ The method is useful for the pattern:
+ - Getting a config from Dashboard by using a Dashboard command. e.g. current iSCSI
+ gateways.
+ - Parse or deserialize previous output. e.g. Dashboard command returns a JSON string.
+ - Determine if the config need to be update. NOTE: This step is important because if a
+ Dashboard command modified Ceph config, cephadm's config_notify() is called. Which
+ kicks the serve() loop and the logic using this method is likely to be called again.
+ A config should be updated only when needed.
+ - Update a config in Dashboard by using a Dashboard command.
+
+ :param service_name: the service name to be used for logging
+ :type service_name: str
+ :param get_cmd: Dashboard command prefix to get config. e.g. dashboard get-grafana-api-url
+ :type get_cmd: str
+ :param get_set_cmd_dicts: function to create a list, and each item is a command dictionary.
+ e.g.
+ [
+ {
+ 'prefix': 'dashboard iscsi-gateway-add',
+ 'service_url': 'http://admin:admin@aaa:5000',
+ 'name': 'aaa'
+ },
+ {
+ 'prefix': 'dashboard iscsi-gateway-add',
+ 'service_url': 'http://admin:admin@bbb:5000',
+ 'name': 'bbb'
+ }
+ ]
+ The function should return empty list if no command need to be sent.
+ :type get_set_cmd_dicts: Callable[[str], List[dict]]
+ """
+
+ try:
+ _, out, _ = self.mgr.check_mon_command({
+ 'prefix': get_cmd
+ })
+ except MonCommandFailed as e:
+ logger.warning('Failed to get Dashboard config for %s: %s', service_name, e)
+ return
+ cmd_dicts = get_set_cmd_dicts(out.strip())
+ for cmd_dict in list(cmd_dicts):
+ try:
+ inbuf = cmd_dict.pop('inbuf', None)
+ _, out, _ = self.mgr.check_mon_command(cmd_dict, inbuf)
+ except MonCommandFailed as e:
+ logger.warning('Failed to set Dashboard config for %s: %s', service_name, e)
+
+ def ok_to_stop_osd(
+ self,
+ osds: List[str],
+ known: Optional[List[str]] = None, # output argument
+ force: bool = False) -> HandleCommandResult:
+ r = HandleCommandResult(*self.mgr.mon_command({
+ 'prefix': "osd ok-to-stop",
+ 'ids': osds,
+ 'max': 16,
+ }))
+ j = None
+ try:
+ j = json.loads(r.stdout)
+ except json.decoder.JSONDecodeError:
+ self.mgr.log.warning("osd ok-to-stop didn't return structured result")
+ raise
+ if r.retval:
+ return r
+ if known is not None and j and j.get('ok_to_stop'):
+ self.mgr.log.debug(f"got {j}")
+ known.extend([f'osd.{x}' for x in j.get('osds', [])])
+ return HandleCommandResult(
+ 0,
+ f'{",".join(["osd.%s" % o for o in osds])} {"is" if len(osds) == 1 else "are"} safe to restart',
+ ''
+ )
+
+ def ok_to_stop(
+ self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None # output argument
+ ) -> HandleCommandResult:
+ names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids]
+ out = f'It appears safe to stop {",".join(names)}'
+ err = f'It is NOT safe to stop {",".join(names)} at this time'
+
+ if self.TYPE not in ['mon', 'osd', 'mds']:
+ logger.debug(out)
+ return HandleCommandResult(0, out)
+
+ if self.TYPE == 'osd':
+ return self.ok_to_stop_osd(daemon_ids, known, force)
+
+ r = HandleCommandResult(*self.mgr.mon_command({
+ 'prefix': f'{self.TYPE} ok-to-stop',
+ 'ids': daemon_ids,
+ }))
+
+ if r.retval:
+ err = f'{err}: {r.stderr}' if r.stderr else err
+ logger.debug(err)
+ return HandleCommandResult(r.retval, r.stdout, err)
+
+ out = f'{out}: {r.stdout}' if r.stdout else out
+ logger.debug(out)
+ return HandleCommandResult(r.retval, out, r.stderr)
+
+ def _enough_daemons_to_stop(self, daemon_type: str, daemon_ids: List[str], service: str, low_limit: int, alert: bool = False) -> Tuple[bool, str]:
+ # Provides a warning about if it possible or not to stop <n> daemons in a service
+ names = [f'{daemon_type}.{d_id}' for d_id in daemon_ids]
+ number_of_running_daemons = len(
+ [daemon
+ for daemon in self.mgr.cache.get_daemons_by_type(daemon_type)
+ if daemon.status == DaemonDescriptionStatus.running])
+ if (number_of_running_daemons - len(daemon_ids)) >= low_limit:
+ return False, f'It is presumed safe to stop {names}'
+
+ num_daemons_left = number_of_running_daemons - len(daemon_ids)
+
+ def plural(count: int) -> str:
+ return 'daemon' if count == 1 else 'daemons'
+
+ left_count = "no" if num_daemons_left == 0 else num_daemons_left
+
+ if alert:
+ out = (f'ALERT: Cannot stop {names} in {service} service. '
+ f'Not enough remaining {service} daemons. '
+ f'Please deploy at least {low_limit + 1} {service} daemons before stopping {names}. ')
+ else:
+ out = (f'WARNING: Stopping {len(daemon_ids)} out of {number_of_running_daemons} daemons in {service} service. '
+ f'Service will not be operational with {left_count} {plural(num_daemons_left)} left. '
+ f'At least {low_limit} {plural(low_limit)} must be running to guarantee service. ')
+ return True, out
+
+ def pre_remove(self, daemon: DaemonDescription) -> None:
+ """
+ Called before the daemon is removed.
+ """
+ assert daemon.daemon_type is not None
+ assert self.TYPE == daemon_type_to_service(daemon.daemon_type)
+ logger.debug(f'Pre remove daemon {self.TYPE}.{daemon.daemon_id}')
+
+ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
+ """
+ Called after the daemon is removed.
+ """
+ assert daemon.daemon_type is not None
+ assert self.TYPE == daemon_type_to_service(daemon.daemon_type)
+ logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}')
+
+ def purge(self, service_name: str) -> None:
+ """Called to carry out any purge tasks following service removal"""
+ logger.debug(f'Purge called for {self.TYPE} - no action taken')
+
+
+class CephService(CephadmService):
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ # Ceph.daemons (mon, mgr, mds, osd, etc)
+ cephadm_config = self.get_config_and_keyring(
+ daemon_spec.daemon_type,
+ daemon_spec.daemon_id,
+ host=daemon_spec.host,
+ keyring=daemon_spec.keyring,
+ extra_ceph_config=daemon_spec.ceph_conf)
+
+ if daemon_spec.config_get_files():
+ cephadm_config.update({'files': daemon_spec.config_get_files()})
+
+ return cephadm_config, []
+
+ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
+ super().post_remove(daemon, is_failed_deploy=is_failed_deploy)
+ self.remove_keyring(daemon)
+
+ def get_auth_entity(self, daemon_id: str, host: str = "") -> AuthEntity:
+ """
+ Map the daemon id to a cephx keyring entity name
+ """
+ # despite this mapping entity names to daemons, self.TYPE within
+ # the CephService class refers to service types, not daemon types
+ if self.TYPE in ['rgw', 'rbd-mirror', 'cephfs-mirror', 'nfs', "iscsi", 'ingress']:
+ return AuthEntity(f'client.{self.TYPE}.{daemon_id}')
+ elif self.TYPE == 'crash':
+ if host == "":
+ raise OrchestratorError("Host not provided to generate <crash> auth entity name")
+ return AuthEntity(f'client.{self.TYPE}.{host}')
+ elif self.TYPE == 'mon':
+ return AuthEntity('mon.')
+ elif self.TYPE in ['mgr', 'osd', 'mds']:
+ return AuthEntity(f'{self.TYPE}.{daemon_id}')
+ else:
+ raise OrchestratorError("unknown daemon type")
+
+ def get_config_and_keyring(self,
+ daemon_type: str,
+ daemon_id: str,
+ host: str,
+ keyring: Optional[str] = None,
+ extra_ceph_config: Optional[str] = None
+ ) -> Dict[str, Any]:
+ # keyring
+ if not keyring:
+ entity: AuthEntity = self.get_auth_entity(daemon_id, host=host)
+ ret, keyring, err = self.mgr.check_mon_command({
+ 'prefix': 'auth get',
+ 'entity': entity,
+ })
+
+ config = self.mgr.get_minimal_ceph_conf()
+
+ if extra_ceph_config:
+ config += extra_ceph_config
+
+ return {
+ 'config': config,
+ 'keyring': keyring,
+ }
+
+ def remove_keyring(self, daemon: DaemonDescription) -> None:
+ assert daemon.daemon_id is not None
+ assert daemon.hostname is not None
+ daemon_id: str = daemon.daemon_id
+ host: str = daemon.hostname
+
+ assert daemon.daemon_type != 'mon'
+
+ entity = self.get_auth_entity(daemon_id, host=host)
+
+ logger.info(f'Removing key for {entity}')
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'auth rm',
+ 'entity': entity,
+ })
+
+
+class MonService(CephService):
+ TYPE = 'mon'
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ """
+ Create a new monitor on the given host.
+ """
+ assert self.TYPE == daemon_spec.daemon_type
+ name, _, network = daemon_spec.daemon_id, daemon_spec.host, daemon_spec.network
+
+ # get mon. key
+ ret, keyring, err = self.mgr.check_mon_command({
+ 'prefix': 'auth get',
+ 'entity': self.get_auth_entity(name),
+ })
+
+ extra_config = '[mon.%s]\n' % name
+ if network:
+ # infer whether this is a CIDR network, addrvec, or plain IP
+ if '/' in network:
+ extra_config += 'public network = %s\n' % network
+ elif network.startswith('[v') and network.endswith(']'):
+ extra_config += 'public addrv = %s\n' % network
+ elif is_ipv6(network):
+ extra_config += 'public addr = %s\n' % unwrap_ipv6(network)
+ elif ':' not in network:
+ extra_config += 'public addr = %s\n' % network
+ else:
+ raise OrchestratorError(
+ 'Must specify a CIDR network, ceph addrvec, or plain IP: \'%s\'' % network)
+ else:
+ # try to get the public_network from the config
+ ret, network, err = self.mgr.check_mon_command({
+ 'prefix': 'config get',
+ 'who': 'mon',
+ 'key': 'public_network',
+ })
+ network = network.strip() if network else network
+ if not network:
+ raise OrchestratorError(
+ 'Must set public_network config option or specify a CIDR network, ceph addrvec, or plain IP')
+ if '/' not in network:
+ raise OrchestratorError(
+ 'public_network is set but does not look like a CIDR network: \'%s\'' % network)
+ extra_config += 'public network = %s\n' % network
+
+ daemon_spec.ceph_conf = extra_config
+ daemon_spec.keyring = keyring
+
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def _check_safe_to_destroy(self, mon_id: str) -> None:
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'quorum_status',
+ })
+ try:
+ j = json.loads(out)
+ except Exception:
+ raise OrchestratorError('failed to parse quorum status')
+
+ mons = [m['name'] for m in j['monmap']['mons']]
+ if mon_id not in mons:
+ logger.info('Safe to remove mon.%s: not in monmap (%s)' % (
+ mon_id, mons))
+ return
+ new_mons = [m for m in mons if m != mon_id]
+ new_quorum = [m for m in j['quorum_names'] if m != mon_id]
+ if len(new_quorum) > len(new_mons) / 2:
+ logger.info('Safe to remove mon.%s: new quorum should be %s (from %s)' %
+ (mon_id, new_quorum, new_mons))
+ return
+ raise OrchestratorError(
+ 'Removing %s would break mon quorum (new quorum %s, new mons %s)' % (mon_id, new_quorum, new_mons))
+
+ def pre_remove(self, daemon: DaemonDescription) -> None:
+ super().pre_remove(daemon)
+
+ assert daemon.daemon_id is not None
+ daemon_id: str = daemon.daemon_id
+ self._check_safe_to_destroy(daemon_id)
+
+ # remove mon from quorum before we destroy the daemon
+ logger.info('Removing monitor %s from monmap...' % daemon_id)
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'mon rm',
+ 'name': daemon_id,
+ })
+
+ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
+ # Do not remove the mon keyring.
+ # super().post_remove(daemon)
+ pass
+
+
+class MgrService(CephService):
+ TYPE = 'mgr'
+
+ def allow_colo(self) -> bool:
+ if self.mgr.get_ceph_option('mgr_standby_modules'):
+ # traditional mgr mode: standby daemons' modules listen on
+ # ports and redirect to the primary. we must not schedule
+ # multiple mgrs on the same host or else ports will
+ # conflict.
+ return False
+ else:
+ # standby daemons do nothing, and therefore port conflicts
+ # are not a concern.
+ return True
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ """
+ Create a new manager instance on a host.
+ """
+ assert self.TYPE == daemon_spec.daemon_type
+ mgr_id, _ = daemon_spec.daemon_id, daemon_spec.host
+
+ # get mgr. key
+ keyring = self.get_keyring_with_caps(self.get_auth_entity(mgr_id),
+ ['mon', 'profile mgr',
+ 'osd', 'allow *',
+ 'mds', 'allow *'])
+
+ # Retrieve ports used by manager modules
+ # In the case of the dashboard port and with several manager daemons
+ # running in different hosts, it exists the possibility that the
+ # user has decided to use different dashboard ports in each server
+ # If this is the case then the dashboard port opened will be only the used
+ # as default.
+ ports = []
+ ret, mgr_services, err = self.mgr.check_mon_command({
+ 'prefix': 'mgr services',
+ })
+ if mgr_services:
+ mgr_endpoints = json.loads(mgr_services)
+ for end_point in mgr_endpoints.values():
+ port = re.search(r'\:\d+\/', end_point)
+ if port:
+ ports.append(int(port[0][1:-1]))
+
+ if ports:
+ daemon_spec.ports = ports
+
+ daemon_spec.keyring = keyring
+
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+ for daemon in daemon_descrs:
+ assert daemon.daemon_type is not None
+ assert daemon.daemon_id is not None
+ if self.mgr.daemon_is_self(daemon.daemon_type, daemon.daemon_id):
+ return daemon
+ # if no active mgr found, return empty Daemon Desc
+ return DaemonDescription()
+
+ def fail_over(self) -> None:
+ # this has been seen to sometimes transiently fail even when there are multiple
+ # mgr daemons. As long as there are multiple known mgr daemons, we should retry.
+ class NoStandbyError(OrchestratorError):
+ pass
+ no_standby_exc = NoStandbyError('Need standby mgr daemon', event_kind_subject=(
+ 'daemon', 'mgr' + self.mgr.get_mgr_id()))
+ for sleep_secs in [2, 8, 15]:
+ try:
+ if not self.mgr_map_has_standby():
+ raise no_standby_exc
+ self.mgr.events.for_daemon('mgr' + self.mgr.get_mgr_id(),
+ 'INFO', 'Failing over to other MGR')
+ logger.info('Failing over to other MGR')
+
+ # fail over
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'mgr fail',
+ 'who': self.mgr.get_mgr_id(),
+ })
+ return
+ except NoStandbyError:
+ logger.info(
+ f'Failed to find standby mgr for failover. Retrying in {sleep_secs} seconds')
+ time.sleep(sleep_secs)
+ raise no_standby_exc
+
+ def mgr_map_has_standby(self) -> bool:
+ """
+ This is a bit safer than asking our inventory. If the mgr joined the mgr map,
+ we know it joined the cluster
+ """
+ mgr_map = self.mgr.get('mgr_map')
+ num = len(mgr_map.get('standbys'))
+ return bool(num)
+
+ def ok_to_stop(
+ self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None # output argument
+ ) -> HandleCommandResult:
+ # ok to stop if there is more than 1 mgr and not trying to stop the active mgr
+
+ warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Mgr', 1, True)
+ if warn:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ mgr_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE)
+ active = self.get_active_daemon(mgr_daemons).daemon_id
+ if active in daemon_ids:
+ warn_message = 'ALERT: Cannot stop active Mgr daemon, Please switch active Mgrs with \'ceph mgr fail %s\'' % active
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ return HandleCommandResult(0, warn_message, '')
+
+
+class MdsService(CephService):
+ TYPE = 'mds'
+
+ def allow_colo(self) -> bool:
+ return True
+
+ def config(self, spec: ServiceSpec) -> None:
+ assert self.TYPE == spec.service_type
+ assert spec.service_id
+
+ # ensure mds_join_fs is set for these daemons
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config set',
+ 'who': 'mds.' + spec.service_id,
+ 'name': 'mds_join_fs',
+ 'value': spec.service_id,
+ })
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ mds_id, _ = daemon_spec.daemon_id, daemon_spec.host
+
+ # get mds. key
+ keyring = self.get_keyring_with_caps(self.get_auth_entity(mds_id),
+ ['mon', 'profile mds',
+ 'osd', 'allow rw tag cephfs *=*',
+ 'mds', 'allow'])
+ daemon_spec.keyring = keyring
+
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+ active_mds_strs = list()
+ for fs in self.mgr.get('fs_map')['filesystems']:
+ mds_map = fs['mdsmap']
+ if mds_map is not None:
+ for mds_id, mds_status in mds_map['info'].items():
+ if mds_status['state'] == 'up:active':
+ active_mds_strs.append(mds_status['name'])
+ if len(active_mds_strs) != 0:
+ for daemon in daemon_descrs:
+ if daemon.daemon_id in active_mds_strs:
+ return daemon
+ # if no mds found, return empty Daemon Desc
+ return DaemonDescription()
+
+ def purge(self, service_name: str) -> None:
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': service_name,
+ 'name': 'mds_join_fs',
+ })
+
+
+class RgwService(CephService):
+ TYPE = 'rgw'
+
+ def allow_colo(self) -> bool:
+ return True
+
+ def config(self, spec: RGWSpec) -> None: # type: ignore
+ assert self.TYPE == spec.service_type
+
+ # set rgw_realm and rgw_zone, if present
+ if spec.rgw_realm:
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config set',
+ 'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}",
+ 'name': 'rgw_realm',
+ 'value': spec.rgw_realm,
+ })
+ if spec.rgw_zone:
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config set',
+ 'who': f"{utils.name_to_config_section('rgw')}.{spec.service_id}",
+ 'name': 'rgw_zone',
+ 'value': spec.rgw_zone,
+ })
+
+ if spec.rgw_frontend_ssl_certificate:
+ if isinstance(spec.rgw_frontend_ssl_certificate, list):
+ cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate)
+ elif isinstance(spec.rgw_frontend_ssl_certificate, str):
+ cert_data = spec.rgw_frontend_ssl_certificate
+ else:
+ raise OrchestratorError(
+ 'Invalid rgw_frontend_ssl_certificate: %s'
+ % spec.rgw_frontend_ssl_certificate)
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config-key set',
+ 'key': f'rgw/cert/{spec.service_name()}',
+ 'val': cert_data,
+ })
+
+ # TODO: fail, if we don't have a spec
+ logger.info('Saving service %s spec with placement %s' % (
+ spec.service_name(), spec.placement.pretty_str()))
+ self.mgr.spec_store.save(spec)
+ self.mgr.trigger_connect_dashboard_rgw()
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ rgw_id, _ = daemon_spec.daemon_id, daemon_spec.host
+ spec = cast(RGWSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+ keyring = self.get_keyring(rgw_id)
+
+ if daemon_spec.ports:
+ port = daemon_spec.ports[0]
+ else:
+ # this is a redeploy of older instance that doesn't have an explicitly
+ # assigned port, in which case we can assume there is only 1 per host
+ # and it matches the spec.
+ port = spec.get_port()
+
+ # configure frontend
+ args = []
+ ftype = spec.rgw_frontend_type or "beast"
+ if ftype == 'beast':
+ if spec.ssl:
+ if daemon_spec.ip:
+ args.append(
+ f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
+ else:
+ args.append(f"ssl_port={port}")
+ args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
+ else:
+ if daemon_spec.ip:
+ args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
+ else:
+ args.append(f"port={port}")
+ elif ftype == 'civetweb':
+ if spec.ssl:
+ if daemon_spec.ip:
+ # note the 's' suffix on port
+ args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s")
+ else:
+ args.append(f"port={port}s") # note the 's' suffix on port
+ args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
+ else:
+ if daemon_spec.ip:
+ args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
+ else:
+ args.append(f"port={port}")
+ frontend = f'{ftype} {" ".join(args)}'
+
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config set',
+ 'who': utils.name_to_config_section(daemon_spec.name()),
+ 'name': 'rgw_frontends',
+ 'value': frontend
+ })
+
+ daemon_spec.keyring = keyring
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def get_keyring(self, rgw_id: str) -> str:
+ keyring = self.get_keyring_with_caps(self.get_auth_entity(rgw_id),
+ ['mon', 'allow *',
+ 'mgr', 'allow rw',
+ 'osd', 'allow rwx tag rgw *=*'])
+ return keyring
+
+ def purge(self, service_name: str) -> None:
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': utils.name_to_config_section(service_name),
+ 'name': 'rgw_realm',
+ })
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': utils.name_to_config_section(service_name),
+ 'name': 'rgw_zone',
+ })
+ self.mgr.check_mon_command({
+ 'prefix': 'config-key rm',
+ 'key': f'rgw/cert/{service_name}',
+ })
+ self.mgr.trigger_connect_dashboard_rgw()
+
+ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
+ super().post_remove(daemon, is_failed_deploy=is_failed_deploy)
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'who': utils.name_to_config_section(daemon.name()),
+ 'name': 'rgw_frontends',
+ })
+
+ def ok_to_stop(
+ self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None # output argument
+ ) -> HandleCommandResult:
+ # if load balancer (ingress) is present block if only 1 daemon up otherwise ok
+ # if no load balancer, warn if > 1 daemon, block if only 1 daemon
+ def ingress_present() -> bool:
+ running_ingress_daemons = [
+ daemon for daemon in self.mgr.cache.get_daemons_by_type('ingress') if daemon.status == 1]
+ running_haproxy_daemons = [
+ daemon for daemon in running_ingress_daemons if daemon.daemon_type == 'haproxy']
+ running_keepalived_daemons = [
+ daemon for daemon in running_ingress_daemons if daemon.daemon_type == 'keepalived']
+ # check that there is at least one haproxy and keepalived daemon running
+ if running_haproxy_daemons and running_keepalived_daemons:
+ return True
+ return False
+
+ # if only 1 rgw, alert user (this is not passable with --force)
+ warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'RGW', 1, True)
+ if warn:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ # if reached here, there is > 1 rgw daemon.
+ # Say okay if load balancer present or force flag set
+ if ingress_present() or force:
+ return HandleCommandResult(0, warn_message, '')
+
+ # if reached here, > 1 RGW daemon, no load balancer and no force flag.
+ # Provide warning
+ warn_message = "WARNING: Removing RGW daemons can cause clients to lose connectivity. "
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
+ self.mgr.trigger_connect_dashboard_rgw()
+
+
+class RbdMirrorService(CephService):
+ TYPE = 'rbd-mirror'
+
+ def allow_colo(self) -> bool:
+ return True
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_id, _ = daemon_spec.daemon_id, daemon_spec.host
+
+ keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id),
+ ['mon', 'profile rbd-mirror',
+ 'osd', 'profile rbd'])
+
+ daemon_spec.keyring = keyring
+
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def ok_to_stop(
+ self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None # output argument
+ ) -> HandleCommandResult:
+ # if only 1 rbd-mirror, alert user (this is not passable with --force)
+ warn, warn_message = self._enough_daemons_to_stop(
+ self.TYPE, daemon_ids, 'Rbdmirror', 1, True)
+ if warn:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+ return HandleCommandResult(0, warn_message, '')
+
+
+class CrashService(CephService):
+ TYPE = 'crash'
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_id, host = daemon_spec.daemon_id, daemon_spec.host
+
+ keyring = self.get_keyring_with_caps(self.get_auth_entity(daemon_id, host=host),
+ ['mon', 'profile crash',
+ 'mgr', 'profile crash'])
+
+ daemon_spec.keyring = keyring
+
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+
+class CephfsMirrorService(CephService):
+ TYPE = 'cephfs-mirror'
+
+ def config(self, spec: ServiceSpec) -> None:
+ # make sure mirroring module is enabled
+ mgr_map = self.mgr.get('mgr_map')
+ mod_name = 'mirroring'
+ if mod_name not in mgr_map.get('services', {}):
+ self.mgr.check_mon_command({
+ 'prefix': 'mgr module enable',
+ 'module': mod_name
+ })
+ # we shouldn't get here (mon will tell the mgr to respawn), but no
+ # harm done if we do.
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+
+ ret, keyring, err = self.mgr.check_mon_command({
+ 'prefix': 'auth get-or-create',
+ 'entity': self.get_auth_entity(daemon_spec.daemon_id),
+ 'caps': ['mon', 'profile cephfs-mirror',
+ 'mds', 'allow r',
+ 'osd', 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*',
+ 'mgr', 'allow r'],
+ })
+
+ daemon_spec.keyring = keyring
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
diff --git a/src/pybind/mgr/cephadm/services/container.py b/src/pybind/mgr/cephadm/services/container.py
new file mode 100644
index 000000000..b9cdfad5e
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/container.py
@@ -0,0 +1,29 @@
+import logging
+from typing import List, Any, Tuple, Dict, cast
+
+from ceph.deployment.service_spec import CustomContainerSpec
+
+from .cephadmservice import CephadmService, CephadmDaemonDeploySpec
+
+logger = logging.getLogger(__name__)
+
+
+class CustomContainerService(CephadmService):
+ TYPE = 'container'
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) \
+ -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) \
+ -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+ deps: List[str] = []
+ spec = cast(CustomContainerSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+ config: Dict[str, Any] = spec.config_json()
+ logger.debug(
+ 'Generated configuration for \'%s\' service: config-json=%s, dependencies=%s' %
+ (self.TYPE, config, deps))
+ return config, deps
diff --git a/src/pybind/mgr/cephadm/services/exporter.py b/src/pybind/mgr/cephadm/services/exporter.py
new file mode 100644
index 000000000..b9c7d85e6
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/exporter.py
@@ -0,0 +1,147 @@
+import json
+import logging
+from typing import TYPE_CHECKING, List, Dict, Any, Tuple
+
+from orchestrator import OrchestratorError
+from mgr_util import ServerConfigException, verify_tls
+
+from .cephadmservice import CephadmService, CephadmDaemonDeploySpec
+
+if TYPE_CHECKING:
+ from cephadm.module import CephadmOrchestrator
+
+logger = logging.getLogger(__name__)
+
+
+class CephadmExporterConfig:
+ required_keys = ['crt', 'key', 'token', 'port']
+ DEFAULT_PORT = '9443'
+
+ def __init__(self, mgr: "CephadmOrchestrator", crt: str = "", key: str = "",
+ token: str = "", port: str = "") -> None:
+ self.mgr = mgr
+ self.crt = crt
+ self.key = key
+ self.token = token
+ self.port = port
+
+ @property
+ def ready(self) -> bool:
+ return all([self.crt, self.key, self.token, self.port])
+
+ def load_from_store(self) -> None:
+ cfg = self.mgr._get_exporter_config()
+
+ assert isinstance(cfg, dict)
+ self.crt = cfg.get('crt', "")
+ self.key = cfg.get('key', "")
+ self.token = cfg.get('token', "")
+ self.port = cfg.get('port', "")
+
+ def load_from_json(self, json_str: str) -> Tuple[int, str]:
+ try:
+ cfg = json.loads(json_str)
+ except ValueError:
+ return 1, "Invalid JSON provided - unable to load"
+
+ if not all([k in cfg for k in CephadmExporterConfig.required_keys]):
+ return 1, "JSON file must contain crt, key, token and port"
+
+ self.crt = cfg.get('crt')
+ self.key = cfg.get('key')
+ self.token = cfg.get('token')
+ self.port = cfg.get('port')
+
+ return 0, ""
+
+ def validate_config(self) -> Tuple[int, str]:
+ if not self.ready:
+ return 1, "Incomplete configuration. cephadm-exporter needs crt, key, token and port to be set"
+
+ for check in [self._validate_tls, self._validate_token, self._validate_port]:
+ rc, reason = check()
+ if rc:
+ return 1, reason
+
+ return 0, ""
+
+ def _validate_tls(self) -> Tuple[int, str]:
+
+ try:
+ verify_tls(self.crt, self.key)
+ except ServerConfigException as e:
+ return 1, str(e)
+
+ return 0, ""
+
+ def _validate_token(self) -> Tuple[int, str]:
+ if not isinstance(self.token, str):
+ return 1, "token must be a string"
+ if len(self.token) < 8:
+ return 1, "Token must be a string of at least 8 chars in length"
+
+ return 0, ""
+
+ def _validate_port(self) -> Tuple[int, str]:
+ try:
+ p = int(str(self.port))
+ if p <= 1024:
+ raise ValueError
+ except ValueError:
+ return 1, "Port must be a integer (>1024)"
+
+ return 0, ""
+
+
+class CephadmExporter(CephadmService):
+ TYPE = 'cephadm-exporter'
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+
+ cfg = CephadmExporterConfig(self.mgr)
+ cfg.load_from_store()
+
+ if cfg.ready:
+ rc, reason = cfg.validate_config()
+ if rc:
+ raise OrchestratorError(reason)
+ else:
+ logger.info(
+ "Incomplete/Missing configuration, applying defaults")
+ self.mgr._set_exporter_defaults()
+ cfg.load_from_store()
+
+ if not daemon_spec.ports:
+ daemon_spec.ports = [int(cfg.port)]
+
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+ deps: List[str] = []
+
+ cfg = CephadmExporterConfig(self.mgr)
+ cfg.load_from_store()
+
+ if cfg.ready:
+ rc, reason = cfg.validate_config()
+ if rc:
+ raise OrchestratorError(reason)
+ else:
+ logger.info("Using default configuration for cephadm-exporter")
+ self.mgr._set_exporter_defaults()
+ cfg.load_from_store()
+
+ config = {
+ "crt": cfg.crt,
+ "key": cfg.key,
+ "token": cfg.token
+ }
+ return config, deps
+
+ def purge(self, service_name: str) -> None:
+ logger.info("Purging cephadm-exporter settings from mon K/V store")
+ self.mgr._clear_exporter_config_settings()
diff --git a/src/pybind/mgr/cephadm/services/ingress.py b/src/pybind/mgr/cephadm/services/ingress.py
new file mode 100644
index 000000000..99fde1c43
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/ingress.py
@@ -0,0 +1,296 @@
+import ipaddress
+import logging
+import random
+import string
+from typing import List, Dict, Any, Tuple, cast, Optional
+
+from ceph.deployment.service_spec import IngressSpec
+from mgr_util import build_url
+from cephadm.utils import resolve_ip
+from orchestrator import OrchestratorError
+from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService
+
+logger = logging.getLogger(__name__)
+
+
+class IngressService(CephService):
+ TYPE = 'ingress'
+
+ def primary_daemon_type(self) -> str:
+ return 'haproxy'
+
+ def per_host_daemon_type(self) -> Optional[str]:
+ return 'keepalived'
+
+ def prepare_create(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ ) -> CephadmDaemonDeploySpec:
+ if daemon_spec.daemon_type == 'haproxy':
+ return self.haproxy_prepare_create(daemon_spec)
+ if daemon_spec.daemon_type == 'keepalived':
+ return self.keepalived_prepare_create(daemon_spec)
+ assert False, "unexpected daemon type"
+
+ def generate_config(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec
+ ) -> Tuple[Dict[str, Any], List[str]]:
+ if daemon_spec.daemon_type == 'haproxy':
+ return self.haproxy_generate_config(daemon_spec)
+ else:
+ return self.keepalived_generate_config(daemon_spec)
+ assert False, "unexpected daemon type"
+
+ def haproxy_prepare_create(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ ) -> CephadmDaemonDeploySpec:
+ assert daemon_spec.daemon_type == 'haproxy'
+
+ daemon_id = daemon_spec.daemon_id
+ host = daemon_spec.host
+ spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+ logger.debug('prepare_create haproxy.%s on host %s with spec %s' % (
+ daemon_id, host, spec))
+
+ daemon_spec.final_config, daemon_spec.deps = self.haproxy_generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def haproxy_generate_config(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ ) -> Tuple[Dict[str, Any], List[str]]:
+ spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+ assert spec.backend_service
+ if spec.backend_service not in self.mgr.spec_store:
+ raise RuntimeError(
+ f'{spec.service_name()} backend service {spec.backend_service} does not exist')
+ backend_spec = self.mgr.spec_store[spec.backend_service].spec
+ daemons = self.mgr.cache.get_daemons_by_service(spec.backend_service)
+ deps = [d.name() for d in daemons]
+
+ # generate password?
+ pw_key = f'{spec.service_name()}/monitor_password'
+ password = self.mgr.get_store(pw_key)
+ if password is None:
+ if not spec.monitor_password:
+ password = ''.join(random.choice(string.ascii_lowercase) for _ in range(20))
+ self.mgr.set_store(pw_key, password)
+ else:
+ if spec.monitor_password:
+ self.mgr.set_store(pw_key, None)
+ if spec.monitor_password:
+ password = spec.monitor_password
+
+ if backend_spec.service_type == 'nfs':
+ mode = 'tcp'
+ by_rank = {d.rank: d for d in daemons if d.rank is not None}
+ servers = []
+
+ # try to establish how many ranks we *should* have
+ num_ranks = backend_spec.placement.count
+ if not num_ranks:
+ num_ranks = 1 + max(by_rank.keys())
+
+ for rank in range(num_ranks):
+ if rank in by_rank:
+ d = by_rank[rank]
+ assert d.ports
+ servers.append({
+ 'name': f"{spec.backend_service}.{rank}",
+ 'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
+ 'port': d.ports[0],
+ })
+ else:
+ # offline/missing server; leave rank in place
+ servers.append({
+ 'name': f"{spec.backend_service}.{rank}",
+ 'ip': '0.0.0.0',
+ 'port': 0,
+ })
+ else:
+ mode = 'http'
+ servers = [
+ {
+ 'name': d.name(),
+ 'ip': d.ip or resolve_ip(self.mgr.inventory.get_addr(str(d.hostname))),
+ 'port': d.ports[0],
+ } for d in daemons if d.ports
+ ]
+
+ haproxy_conf = self.mgr.template.render(
+ 'services/ingress/haproxy.cfg.j2',
+ {
+ 'spec': spec,
+ 'mode': mode,
+ 'servers': servers,
+ 'user': spec.monitor_user or 'admin',
+ 'password': password,
+ 'ip': "*" if spec.virtual_ips_list else str(spec.virtual_ip).split('/')[0] or daemon_spec.ip or '*',
+ 'frontend_port': daemon_spec.ports[0] if daemon_spec.ports else spec.frontend_port,
+ 'monitor_port': daemon_spec.ports[1] if daemon_spec.ports else spec.monitor_port,
+ }
+ )
+ config_files = {
+ 'files': {
+ "haproxy.cfg": haproxy_conf,
+ }
+ }
+ if spec.ssl_cert:
+ ssl_cert = spec.ssl_cert
+ if isinstance(ssl_cert, list):
+ ssl_cert = '\n'.join(ssl_cert)
+ config_files['files']['haproxy.pem'] = ssl_cert
+
+ return config_files, sorted(deps)
+
+ def keepalived_prepare_create(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ ) -> CephadmDaemonDeploySpec:
+ assert daemon_spec.daemon_type == 'keepalived'
+
+ daemon_id = daemon_spec.daemon_id
+ host = daemon_spec.host
+ spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+ logger.debug('prepare_create keepalived.%s on host %s with spec %s' % (
+ daemon_id, host, spec))
+
+ daemon_spec.final_config, daemon_spec.deps = self.keepalived_generate_config(daemon_spec)
+
+ return daemon_spec
+
+ def keepalived_generate_config(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ ) -> Tuple[Dict[str, Any], List[str]]:
+ spec = cast(IngressSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+ assert spec.backend_service
+
+ # generate password?
+ pw_key = f'{spec.service_name()}/keepalived_password'
+ password = self.mgr.get_store(pw_key)
+ if password is None:
+ if not spec.keepalived_password:
+ password = ''.join(random.choice(string.ascii_lowercase) for _ in range(20))
+ self.mgr.set_store(pw_key, password)
+ else:
+ if spec.keepalived_password:
+ self.mgr.set_store(pw_key, None)
+ if spec.keepalived_password:
+ password = spec.keepalived_password
+
+ daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())
+
+ if not daemons:
+ raise OrchestratorError(
+ f'Failed to generate keepalived.conf: No daemons deployed for {spec.service_name()}')
+
+ deps = sorted([d.name() for d in daemons if d.daemon_type == 'haproxy'])
+
+ host = daemon_spec.host
+ hosts = sorted(list(set([host] + [str(d.hostname) for d in daemons])))
+
+ # interface
+ bare_ips = []
+ if spec.virtual_ip:
+ bare_ips.append(str(spec.virtual_ip).split('/')[0])
+ elif spec.virtual_ips_list:
+ bare_ips = [str(vip).split('/')[0] for vip in spec.virtual_ips_list]
+ interface = None
+ for bare_ip in bare_ips:
+ for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items():
+ if ifaces and ipaddress.ip_address(bare_ip) in ipaddress.ip_network(subnet):
+ interface = list(ifaces.keys())[0]
+ logger.info(
+ f'{bare_ip} is in {subnet} on {host} interface {interface}'
+ )
+ break
+ else: # nobreak
+ continue
+ break
+ # try to find interface by matching spec.virtual_interface_networks
+ if not interface and spec.virtual_interface_networks:
+ for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items():
+ if subnet in spec.virtual_interface_networks:
+ interface = list(ifaces.keys())[0]
+ logger.info(
+ f'{spec.virtual_ip} will be configured on {host} interface '
+ f'{interface} (which has guiding subnet {subnet})'
+ )
+ break
+ if not interface:
+ raise OrchestratorError(
+ f"Unable to identify interface for {spec.virtual_ip} on {host}"
+ )
+
+ # script to monitor health
+ script = '/usr/bin/false'
+ for d in daemons:
+ if d.hostname == host:
+ if d.daemon_type == 'haproxy':
+ assert d.ports
+ port = d.ports[1] # monitoring port
+ script = f'/usr/bin/curl {build_url(scheme="http", host=d.ip or "localhost", port=port)}/health'
+ assert script
+
+ states = []
+ priorities = []
+ virtual_ips = []
+
+ # Set state and priority. Have one master for each VIP. Or at least the first one as master if only one VIP.
+ if spec.virtual_ip:
+ virtual_ips.append(spec.virtual_ip)
+ if hosts[0] == host:
+ states.append('MASTER')
+ priorities.append(100)
+ else:
+ states.append('BACKUP')
+ priorities.append(90)
+
+ elif spec.virtual_ips_list:
+ virtual_ips = spec.virtual_ips_list
+ if len(virtual_ips) > len(hosts):
+ raise OrchestratorError(
+ "Number of virtual IPs for ingress is greater than number of available hosts"
+ )
+ for x in range(len(virtual_ips)):
+ if hosts[x] == host:
+ states.append('MASTER')
+ priorities.append(100)
+ else:
+ states.append('BACKUP')
+ priorities.append(90)
+
+ # remove host, daemon is being deployed on from hosts list for
+ # other_ips in conf file and converter to ips
+ if host in hosts:
+ hosts.remove(host)
+ other_ips = [resolve_ip(self.mgr.inventory.get_addr(h)) for h in hosts]
+
+ keepalived_conf = self.mgr.template.render(
+ 'services/ingress/keepalived.conf.j2',
+ {
+ 'spec': spec,
+ 'script': script,
+ 'password': password,
+ 'interface': interface,
+ 'virtual_ips': virtual_ips,
+ 'states': states,
+ 'priorities': priorities,
+ 'other_ips': other_ips,
+ 'host_ip': resolve_ip(self.mgr.inventory.get_addr(host)),
+ }
+ )
+
+ config_file = {
+ 'files': {
+ "keepalived.conf": keepalived_conf,
+ }
+ }
+
+ return config_file, deps
diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py
new file mode 100644
index 000000000..c42eff683
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/iscsi.py
@@ -0,0 +1,210 @@
+import errno
+import json
+import logging
+import subprocess
+from typing import List, cast, Optional
+from ipaddress import ip_address, IPv6Address
+
+from mgr_module import HandleCommandResult
+from ceph.deployment.service_spec import IscsiServiceSpec
+
+from orchestrator import DaemonDescription, DaemonDescriptionStatus
+from .cephadmservice import CephadmDaemonDeploySpec, CephService
+from .. import utils
+
+logger = logging.getLogger(__name__)
+
+
+class IscsiService(CephService):
+ TYPE = 'iscsi'
+
+ def config(self, spec: IscsiServiceSpec) -> None: # type: ignore
+ assert self.TYPE == spec.service_type
+ assert spec.pool
+ self.mgr._check_pool_exists(spec.pool, spec.service_name())
+
+ def get_trusted_ips(self, spec: IscsiServiceSpec) -> str:
+ # add active mgr ip address to trusted list so dashboard can access
+ trusted_ip_list = spec.trusted_ip_list if spec.trusted_ip_list else ''
+ if trusted_ip_list:
+ trusted_ip_list += ','
+ trusted_ip_list += self.mgr.get_mgr_ip()
+ return trusted_ip_list
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+
+ spec = cast(IscsiServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+ igw_id = daemon_spec.daemon_id
+
+ keyring = self.get_keyring_with_caps(self.get_auth_entity(igw_id),
+ ['mon', 'profile rbd, '
+ 'allow command "osd blocklist", '
+ 'allow command "config-key get" with "key" prefix "iscsi/"',
+ 'mgr', 'allow command "service status"',
+ 'osd', 'allow rwx'])
+
+ if spec.ssl_cert:
+ if isinstance(spec.ssl_cert, list):
+ cert_data = '\n'.join(spec.ssl_cert)
+ else:
+ cert_data = spec.ssl_cert
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config-key set',
+ 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.crt',
+ 'val': cert_data,
+ })
+
+ if spec.ssl_key:
+ if isinstance(spec.ssl_key, list):
+ key_data = '\n'.join(spec.ssl_key)
+ else:
+ key_data = spec.ssl_key
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config-key set',
+ 'key': f'iscsi/{utils.name_to_config_section("iscsi")}.{igw_id}/iscsi-gateway.key',
+ 'val': key_data,
+ })
+
+ trusted_ip_list = self.get_trusted_ips(spec)
+
+ context = {
+ 'client_name': '{}.{}'.format(utils.name_to_config_section('iscsi'), igw_id),
+ 'trusted_ip_list': trusted_ip_list,
+ 'spec': spec
+ }
+ igw_conf = self.mgr.template.render('services/iscsi/iscsi-gateway.cfg.j2', context)
+
+ daemon_spec.keyring = keyring
+ daemon_spec.extra_files = {'iscsi-gateway.cfg': igw_conf}
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ daemon_spec.deps = [trusted_ip_list]
+ return daemon_spec
+
+ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
+ def get_set_cmd_dicts(out: str) -> List[dict]:
+ gateways = json.loads(out)['gateways']
+ cmd_dicts = []
+ # TODO: fail, if we don't have a spec
+ spec = cast(IscsiServiceSpec,
+ self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None))
+ if spec.api_secure and spec.ssl_cert and spec.ssl_key:
+ cmd_dicts.append({
+ 'prefix': 'dashboard set-iscsi-api-ssl-verification',
+ 'value': "false"
+ })
+ else:
+ cmd_dicts.append({
+ 'prefix': 'dashboard set-iscsi-api-ssl-verification',
+ 'value': "true"
+ })
+ for dd in daemon_descrs:
+ assert dd.hostname is not None
+ # todo: this can fail:
+ spec = cast(IscsiServiceSpec,
+ self.mgr.spec_store.all_specs.get(dd.service_name(), None))
+ if not spec:
+ logger.warning('No ServiceSpec found for %s', dd)
+ continue
+ ip = utils.resolve_ip(self.mgr.inventory.get_addr(dd.hostname))
+ # IPv6 URL encoding requires square brackets enclosing the ip
+ if type(ip_address(ip)) is IPv6Address:
+ ip = f'[{ip}]'
+ protocol = "http"
+ if spec.api_secure and spec.ssl_cert and spec.ssl_key:
+ protocol = "https"
+ service_url = '{}://{}:{}@{}:{}'.format(
+ protocol, spec.api_user or 'admin', spec.api_password or 'admin', ip, spec.api_port or '5000')
+ gw = gateways.get(dd.hostname)
+ if not gw or gw['service_url'] != service_url:
+ safe_service_url = '{}://{}:{}@{}:{}'.format(
+ protocol, '<api-user>', '<api-password>', ip, spec.api_port or '5000')
+ logger.info('Adding iSCSI gateway %s to Dashboard', safe_service_url)
+ cmd_dicts.append({
+ 'prefix': 'dashboard iscsi-gateway-add',
+ 'inbuf': service_url,
+ 'name': dd.hostname
+ })
+ return cmd_dicts
+
+ self._check_and_set_dashboard(
+ service_name='iSCSI',
+ get_cmd='dashboard iscsi-gateway-list',
+ get_set_cmd_dicts=get_set_cmd_dicts
+ )
+
+ def ok_to_stop(self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None) -> HandleCommandResult:
+ # if only 1 iscsi, alert user (this is not passable with --force)
+ warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Iscsi', 1, True)
+ if warn:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ # if reached here, there is > 1 nfs daemon. make sure none are down
+ warn_message = (
+ 'ALERT: 1 iscsi daemon is already down. Please bring it back up before stopping this one')
+ iscsi_daemons = self.mgr.cache.get_daemons_by_type(self.TYPE)
+ for i in iscsi_daemons:
+ if i.status != DaemonDescriptionStatus.running:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids]
+ warn_message = f'It is presumed safe to stop {names}'
+ return HandleCommandResult(0, warn_message, '')
+
+ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
+ """
+ Called after the daemon is removed.
+ """
+ logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}')
+
+ # remove config for dashboard iscsi gateways
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'dashboard iscsi-gateway-rm',
+ 'name': daemon.hostname,
+ })
+ if not ret:
+ logger.info(f'{daemon.hostname} removed from iscsi gateways dashboard config')
+
+ # needed to know if we have ssl stuff for iscsi in ceph config
+ iscsi_config_dict = {}
+ ret, iscsi_config, err = self.mgr.mon_command({
+ 'prefix': 'config-key dump',
+ 'key': 'iscsi',
+ })
+ if iscsi_config:
+ iscsi_config_dict = json.loads(iscsi_config)
+
+ # remove iscsi cert and key from ceph config
+ for iscsi_key, value in iscsi_config_dict.items():
+ if f'iscsi/client.{daemon.name()}/' in iscsi_key:
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'config-key rm',
+ 'key': iscsi_key,
+ })
+ logger.info(f'{iscsi_key} removed from ceph config')
+
+ def purge(self, service_name: str) -> None:
+ """Removes configuration
+ """
+ spec = cast(IscsiServiceSpec, self.mgr.spec_store[service_name].spec)
+ try:
+ # remove service configuration from the pool
+ try:
+ subprocess.run(['rados',
+ '-k', str(self.mgr.get_ceph_option('keyring')),
+ '-n', f'mgr.{self.mgr.get_mgr_id()}',
+ '-p', cast(str, spec.pool),
+ 'rm',
+ 'gateway.conf'],
+ timeout=5)
+ logger.info(f'<gateway.conf> removed from {spec.pool}')
+ except subprocess.CalledProcessError as ex:
+ logger.error(f'Error executing <<{ex.cmd}>>: {ex.output}')
+ except subprocess.TimeoutExpired:
+ logger.error(f'timeout (5s) trying to remove <gateway.conf> from {spec.pool}')
+
+ except Exception:
+ logger.exception(f'failed to purge {service_name}')
diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py
new file mode 100644
index 000000000..8de7195a3
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/monitoring.py
@@ -0,0 +1,502 @@
+import errno
+import ipaddress
+import logging
+import os
+import socket
+from typing import List, Any, Tuple, Dict, Optional, cast
+from urllib.parse import urlparse
+
+from mgr_module import HandleCommandResult
+
+from orchestrator import DaemonDescription
+from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
+ SNMPGatewaySpec, PrometheusSpec
+from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
+from cephadm.services.ingress import IngressSpec
+from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url
+
+logger = logging.getLogger(__name__)
+
+
+class GrafanaService(CephadmService):
+ TYPE = 'grafana'
+ DEFAULT_SERVICE_PORT = 3000
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+ deps = [] # type: List[str]
+
+ prom_services = [] # type: List[str]
+ for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
+ assert dd.hostname is not None
+ addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+ port = dd.ports[0] if dd.ports else 9095
+ prom_services.append(build_url(scheme='http', host=addr, port=port))
+
+ deps.append(dd.name())
+ grafana_data_sources = self.mgr.template.render(
+ 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
+
+ cert_path = f'{daemon_spec.host}/grafana_crt'
+ key_path = f'{daemon_spec.host}/grafana_key'
+ cert = self.mgr.get_store(cert_path)
+ pkey = self.mgr.get_store(key_path)
+ if cert and pkey:
+ try:
+ verify_tls(cert, pkey)
+ except ServerConfigException as e:
+ logger.warning('Provided grafana TLS certificates invalid: %s', str(e))
+ cert, pkey = None, None
+ if not (cert and pkey):
+ cert, pkey = create_self_signed_cert('Ceph', daemon_spec.host)
+ self.mgr.set_store(cert_path, cert)
+ self.mgr.set_store(key_path, pkey)
+ if 'dashboard' in self.mgr.get('mgr_map')['modules']:
+ self.mgr.check_mon_command({
+ 'prefix': 'dashboard set-grafana-api-ssl-verify',
+ 'value': 'false',
+ })
+
+ spec: GrafanaSpec = cast(
+ GrafanaSpec, self.mgr.spec_store.active_specs[daemon_spec.service_name])
+ grafana_ini = self.mgr.template.render(
+ 'services/grafana/grafana.ini.j2', {
+ 'initial_admin_password': spec.initial_admin_password,
+ 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT,
+ 'http_addr': daemon_spec.ip if daemon_spec.ip else ''
+ })
+
+ if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password:
+ self.mgr.check_mon_command(
+ {'prefix': 'dashboard set-grafana-api-password'}, inbuf=spec.initial_admin_password)
+
+ config_file = {
+ 'files': {
+ "grafana.ini": grafana_ini,
+ 'provisioning/datasources/ceph-dashboard.yml': grafana_data_sources,
+ 'certs/cert_file': '# generated by cephadm\n%s' % cert,
+ 'certs/cert_key': '# generated by cephadm\n%s' % pkey,
+ }
+ }
+ return config_file, sorted(deps)
+
+ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+ # Use the least-created one as the active daemon
+ if daemon_descrs:
+ return daemon_descrs[-1]
+ # if empty list provided, return empty Daemon Desc
+ return DaemonDescription()
+
+ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
+ # TODO: signed cert
+ dd = self.get_active_daemon(daemon_descrs)
+ assert dd.hostname is not None
+ addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+ port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+ service_url = build_url(scheme='https', host=addr, port=port)
+ self._set_service_url_on_dashboard(
+ 'Grafana',
+ 'dashboard get-grafana-api-url',
+ 'dashboard set-grafana-api-url',
+ service_url
+ )
+
+ def pre_remove(self, daemon: DaemonDescription) -> None:
+ """
+ Called before grafana daemon is removed.
+ """
+ if daemon.hostname is not None:
+ # delete cert/key entires for this grafana daemon
+ cert_path = f'{daemon.hostname}/grafana_crt'
+ key_path = f'{daemon.hostname}/grafana_key'
+ self.mgr.set_store(cert_path, None)
+ self.mgr.set_store(key_path, None)
+
+ def ok_to_stop(self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None) -> HandleCommandResult:
+ warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Grafana', 1)
+ if warn and not force:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+ return HandleCommandResult(0, warn_message, '')
+
+
+class AlertmanagerService(CephadmService):
+ TYPE = 'alertmanager'
+ DEFAULT_SERVICE_PORT = 9093
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+ deps: List[str] = []
+ default_webhook_urls: List[str] = []
+
+ spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+ try:
+ secure = spec.secure
+ except AttributeError:
+ secure = False
+ user_data = spec.user_data
+ if 'default_webhook_urls' in user_data and isinstance(
+ user_data['default_webhook_urls'], list):
+ default_webhook_urls.extend(user_data['default_webhook_urls'])
+
+ # dashboard(s)
+ dashboard_urls: List[str] = []
+ snmp_gateway_urls: List[str] = []
+ mgr_map = self.mgr.get('mgr_map')
+ port = None
+ proto = None # http: or https:
+ url = mgr_map.get('services', {}).get('dashboard', None)
+ if url:
+ p_result = urlparse(url.rstrip('/'))
+ hostname = socket.getfqdn(p_result.hostname)
+
+ try:
+ ip = ipaddress.ip_address(hostname)
+ except ValueError:
+ pass
+ else:
+ if ip.version == 6:
+ hostname = f'[{hostname}]'
+
+ dashboard_urls.append(
+ f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}')
+ proto = p_result.scheme
+ port = p_result.port
+ # scan all mgrs to generate deps and to get standbys too.
+ # assume that they are all on the same port as the active mgr.
+ for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+ # we consider mgr a dep even if the dashboard is disabled
+ # in order to be consistent with _calc_daemon_deps().
+ deps.append(dd.name())
+ if not port:
+ continue
+ if dd.daemon_id == self.mgr.get_mgr_id():
+ continue
+ assert dd.hostname is not None
+ addr = self._inventory_get_fqdn(dd.hostname)
+ dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/'))
+
+ for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'):
+ assert dd.hostname is not None
+ assert dd.ports
+ addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+ deps.append(dd.name())
+
+ snmp_gateway_urls.append(build_url(scheme='http', host=addr,
+ port=dd.ports[0], path='/alerts'))
+
+ context = {
+ 'dashboard_urls': dashboard_urls,
+ 'default_webhook_urls': default_webhook_urls,
+ 'snmp_gateway_urls': snmp_gateway_urls,
+ 'secure': secure,
+ }
+ yml = self.mgr.template.render('services/alertmanager/alertmanager.yml.j2', context)
+
+ peers = []
+ port = 9094
+ for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
+ assert dd.hostname is not None
+ deps.append(dd.name())
+ addr = self._inventory_get_fqdn(dd.hostname)
+ peers.append(build_url(host=addr, port=port).lstrip('/'))
+
+ return {
+ "files": {
+ "alertmanager.yml": yml
+ },
+ "peers": peers
+ }, sorted(deps)
+
+ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+ # TODO: if there are multiple daemons, who is the active one?
+ if daemon_descrs:
+ return daemon_descrs[0]
+ # if empty list provided, return empty Daemon Desc
+ return DaemonDescription()
+
+ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
+ dd = self.get_active_daemon(daemon_descrs)
+ assert dd.hostname is not None
+ addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+ port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+ service_url = build_url(scheme='http', host=addr, port=port)
+ self._set_service_url_on_dashboard(
+ 'AlertManager',
+ 'dashboard get-alertmanager-api-host',
+ 'dashboard set-alertmanager-api-host',
+ service_url
+ )
+
+ def ok_to_stop(self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None) -> HandleCommandResult:
+ warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Alertmanager', 1)
+ if warn and not force:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+ return HandleCommandResult(0, warn_message, '')
+
+
+class PrometheusService(CephadmService):
+ TYPE = 'prometheus'
+ DEFAULT_SERVICE_PORT = 9095
+ DEFAULT_MGR_PROMETHEUS_PORT = 9283
+
+ def config(self, spec: ServiceSpec) -> None:
+ # make sure module is enabled
+ mgr_map = self.mgr.get('mgr_map')
+ if 'prometheus' not in mgr_map.get('services', {}):
+ self.mgr.check_mon_command({
+ 'prefix': 'mgr module enable',
+ 'module': 'prometheus'
+ })
+ # we shouldn't get here (mon will tell the mgr to respawn), but no
+ # harm done if we do.
+
+ def prepare_create(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ ) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
+
+ def generate_config(
+ self,
+ daemon_spec: CephadmDaemonDeploySpec,
+ ) -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+ deps = [] # type: List[str]
+
+ prom_spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+ try:
+ retention_time = prom_spec.retention_time if prom_spec.retention_time else '15d'
+ except AttributeError:
+ retention_time = '15d'
+
+ # scrape mgrs
+ mgr_scrape_list = []
+ mgr_map = self.mgr.get('mgr_map')
+ port = cast(int, self.mgr.get_module_option_ex(
+ 'prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT))
+ deps.append(str(port))
+ t = mgr_map.get('services', {}).get('prometheus', None)
+ if t:
+ p_result = urlparse(t)
+ # urlparse .hostname removes '[]' from the hostname in case
+ # of ipv6 addresses so if this is the case then we just
+ # append the brackets when building the final scrape endpoint
+ if '[' in p_result.netloc and ']' in p_result.netloc:
+ mgr_scrape_list.append(f"[{p_result.hostname}]:{port}")
+ else:
+ mgr_scrape_list.append(f"{p_result.hostname}:{port}")
+ # scan all mgrs to generate deps and to get standbys too.
+ # assume that they are all on the same port as the active mgr.
+ for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+ # we consider the mgr a dep even if the prometheus module is
+ # disabled in order to be consistent with _calc_daemon_deps().
+ deps.append(dd.name())
+ if not port:
+ continue
+ if dd.daemon_id == self.mgr.get_mgr_id():
+ continue
+ assert dd.hostname is not None
+ addr = self._inventory_get_fqdn(dd.hostname)
+ mgr_scrape_list.append(build_url(host=addr, port=port).lstrip('/'))
+
+ # scrape node exporters
+ nodes = []
+ for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
+ assert dd.hostname is not None
+ deps.append(dd.name())
+ addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+ port = dd.ports[0] if dd.ports else 9100
+ nodes.append({
+ 'hostname': dd.hostname,
+ 'url': build_url(host=addr, port=port).lstrip('/')
+ })
+
+ # scrape alert managers
+ alertmgr_targets = []
+ for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
+ assert dd.hostname is not None
+ deps.append(dd.name())
+ addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+ port = dd.ports[0] if dd.ports else 9093
+ alertmgr_targets.append("'{}'".format(build_url(host=addr, port=port).lstrip('/')))
+
+ # scrape haproxies
+ haproxy_targets = []
+ for dd in self.mgr.cache.get_daemons_by_type('ingress'):
+ if dd.service_name() in self.mgr.spec_store:
+ spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec)
+ assert dd.hostname is not None
+ deps.append(dd.name())
+ if dd.daemon_type == 'haproxy':
+ addr = self._inventory_get_fqdn(dd.hostname)
+ haproxy_targets.append({
+ "url": f"'{build_url(host=addr, port=spec.monitor_port).lstrip('/')}'",
+ "service": dd.service_name(),
+ })
+
+ # generate the prometheus configuration
+ context = {
+ 'alertmgr_targets': alertmgr_targets,
+ 'mgr_scrape_list': mgr_scrape_list,
+ 'haproxy_targets': haproxy_targets,
+ 'nodes': nodes,
+ }
+ r: Dict[str, Any] = {
+ 'files': {
+ 'prometheus.yml':
+ self.mgr.template.render(
+ 'services/prometheus/prometheus.yml.j2', context)
+ },
+ 'retention_time': retention_time
+ }
+
+ # include alerts, if present in the container
+ if os.path.exists(self.mgr.prometheus_alerts_path):
+ with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
+ alerts = f.read()
+ r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
+
+ # Include custom alerts if present in key value store. This enables the
+ # users to add custom alerts. Write the file in any case, so that if the
+ # content of the key value store changed, that file is overwritten
+ # (emptied in case they value has been removed from the key value
+ # store). This prevents the necessity to adapt `cephadm` binary to
+ # remove the file.
+ #
+ # Don't use the template engine for it as
+ #
+ # 1. the alerts are always static and
+ # 2. they are a template themselves for the Go template engine, which
+ # use curly braces and escaping that is cumbersome and unnecessary
+ # for the user.
+ #
+ r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \
+ self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '')
+
+ return r, sorted(deps)
+
+ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+ # TODO: if there are multiple daemons, who is the active one?
+ if daemon_descrs:
+ return daemon_descrs[0]
+ # if empty list provided, return empty Daemon Desc
+ return DaemonDescription()
+
+ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
+ dd = self.get_active_daemon(daemon_descrs)
+ assert dd.hostname is not None
+ addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+ port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+ service_url = build_url(scheme='http', host=addr, port=port)
+ self._set_service_url_on_dashboard(
+ 'Prometheus',
+ 'dashboard get-prometheus-api-host',
+ 'dashboard set-prometheus-api-host',
+ service_url
+ )
+
+ def ok_to_stop(self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None) -> HandleCommandResult:
+ warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'Prometheus', 1)
+ if warn and not force:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+ return HandleCommandResult(0, warn_message, '')
+
+
+class NodeExporterService(CephadmService):
+ TYPE = 'node-exporter'
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+ return {}, []
+
+ def ok_to_stop(self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None) -> HandleCommandResult:
+ # since node exporter runs on each host and cannot compromise data, no extra checks required
+ names = [f'{self.TYPE}.{d_id}' for d_id in daemon_ids]
+ out = f'It is presumed safe to stop {names}'
+ return HandleCommandResult(0, out, '')
+
+
+class SNMPGatewayService(CephadmService):
+ TYPE = 'snmp-gateway'
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+ deps: List[str] = []
+
+ spec = cast(SNMPGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+ config = {
+ "destination": spec.snmp_destination,
+ "snmp_version": spec.snmp_version,
+ }
+ if spec.snmp_version == 'V2c':
+ community = spec.credentials.get('snmp_community', None)
+ assert community is not None
+
+ config.update({
+ "snmp_community": community
+ })
+ else:
+ # SNMP v3 settings can be either authNoPriv or authPriv
+ auth_protocol = 'SHA' if not spec.auth_protocol else spec.auth_protocol
+
+ auth_username = spec.credentials.get('snmp_v3_auth_username', None)
+ auth_password = spec.credentials.get('snmp_v3_auth_password', None)
+ assert auth_username is not None
+ assert auth_password is not None
+ assert spec.engine_id is not None
+
+ config.update({
+ "snmp_v3_auth_protocol": auth_protocol,
+ "snmp_v3_auth_username": auth_username,
+ "snmp_v3_auth_password": auth_password,
+ "snmp_v3_engine_id": spec.engine_id,
+ })
+ # authPriv adds encryption
+ if spec.privacy_protocol:
+ priv_password = spec.credentials.get('snmp_v3_priv_password', None)
+ assert priv_password is not None
+
+ config.update({
+ "snmp_v3_priv_protocol": spec.privacy_protocol,
+ "snmp_v3_priv_password": priv_password,
+ })
+
+ logger.debug(
+ f"Generated configuration for '{self.TYPE}' service. Dependencies={deps}")
+
+ return config, sorted(deps)
diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py
new file mode 100644
index 000000000..ee53283bd
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/nfs.py
@@ -0,0 +1,290 @@
+import errno
+import logging
+import os
+import subprocess
+import tempfile
+from typing import Dict, Tuple, Any, List, cast, Optional
+
+from mgr_module import HandleCommandResult
+from mgr_module import NFS_POOL_NAME as POOL_NAME
+
+from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec
+
+from orchestrator import DaemonDescription
+
+from cephadm.services.cephadmservice import AuthEntity, CephadmDaemonDeploySpec, CephService
+
+logger = logging.getLogger(__name__)
+
+
+class NFSService(CephService):
+ TYPE = 'nfs'
+
+ def ranked(self) -> bool:
+ return True
+
+ def fence(self, daemon_id: str) -> None:
+ logger.info(f'Fencing old nfs.{daemon_id}')
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'auth rm',
+ 'entity': f'client.nfs.{daemon_id}',
+ })
+
+ # TODO: block/fence this entity (in case it is still running somewhere)
+
+ def fence_old_ranks(self,
+ spec: ServiceSpec,
+ rank_map: Dict[int, Dict[int, Optional[str]]],
+ num_ranks: int) -> None:
+ for rank, m in list(rank_map.items()):
+ if rank >= num_ranks:
+ for daemon_id in m.values():
+ if daemon_id is not None:
+ self.fence(daemon_id)
+ del rank_map[rank]
+ nodeid = f'{spec.service_name()}.{rank}'
+ self.mgr.log.info(f'Removing {nodeid} from the ganesha grace table')
+ self.run_grace_tool(cast(NFSServiceSpec, spec), 'remove', nodeid)
+ self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
+ else:
+ max_gen = max(m.keys())
+ for gen, daemon_id in list(m.items()):
+ if gen < max_gen:
+ if daemon_id is not None:
+ self.fence(daemon_id)
+ del rank_map[rank][gen]
+ self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map)
+
+ def config(self, spec: NFSServiceSpec) -> None: # type: ignore
+ from nfs.cluster import create_ganesha_pool
+
+ assert self.TYPE == spec.service_type
+ create_ganesha_pool(self.mgr)
+
+ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
+ assert self.TYPE == daemon_spec.daemon_type
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ return daemon_spec
+
+ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
+ assert self.TYPE == daemon_spec.daemon_type
+
+ daemon_type = daemon_spec.daemon_type
+ daemon_id = daemon_spec.daemon_id
+ host = daemon_spec.host
+ spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+
+ deps: List[str] = []
+
+ nodeid = f'{daemon_spec.service_name}.{daemon_spec.rank}'
+
+ # create the RADOS recovery pool keyring
+ rados_user = f'{daemon_type}.{daemon_id}'
+ rados_keyring = self.create_keyring(daemon_spec)
+
+ # ensure rank is known to ganesha
+ self.mgr.log.info(f'Ensuring {nodeid} is in the ganesha grace table')
+ self.run_grace_tool(spec, 'add', nodeid)
+
+ # create the rados config object
+ self.create_rados_config_obj(spec)
+
+ # create the RGW keyring
+ rgw_user = f'{rados_user}-rgw'
+ rgw_keyring = self.create_rgw_keyring(daemon_spec)
+
+ # generate the ganesha config
+ def get_ganesha_conf() -> str:
+ context = {
+ "user": rados_user,
+ "nodeid": nodeid,
+ "pool": POOL_NAME,
+ "namespace": spec.service_id,
+ "rgw_user": rgw_user,
+ "url": f'rados://{POOL_NAME}/{spec.service_id}/{spec.rados_config_name()}',
+ # fall back to default NFS port if not present in daemon_spec
+ "port": daemon_spec.ports[0] if daemon_spec.ports else 2049,
+ "bind_addr": daemon_spec.ip if daemon_spec.ip else '',
+ }
+ return self.mgr.template.render('services/nfs/ganesha.conf.j2', context)
+
+ # generate the cephadm config json
+ def get_cephadm_config() -> Dict[str, Any]:
+ config: Dict[str, Any] = {}
+ config['pool'] = POOL_NAME
+ config['namespace'] = spec.service_id
+ config['userid'] = rados_user
+ config['extra_args'] = ['-N', 'NIV_EVENT']
+ config['files'] = {
+ 'ganesha.conf': get_ganesha_conf(),
+ }
+ config.update(
+ self.get_config_and_keyring(
+ daemon_type, daemon_id,
+ keyring=rados_keyring,
+ host=host
+ )
+ )
+ config['rgw'] = {
+ 'cluster': 'ceph',
+ 'user': rgw_user,
+ 'keyring': rgw_keyring,
+ }
+ logger.debug('Generated cephadm config-json: %s' % config)
+ return config
+
+ return get_cephadm_config(), deps
+
+ def create_rados_config_obj(self,
+ spec: NFSServiceSpec,
+ clobber: bool = False) -> None:
+ objname = spec.rados_config_name()
+ cmd = [
+ 'rados',
+ '-n', f"mgr.{self.mgr.get_mgr_id()}",
+ '-k', str(self.mgr.get_ceph_option('keyring')),
+ '-p', POOL_NAME,
+ '--namespace', cast(str, spec.service_id),
+ ]
+ result = subprocess.run(
+ cmd + ['get', objname, '-'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ timeout=10)
+ if not result.returncode and not clobber:
+ logger.info('Rados config object exists: %s' % objname)
+ else:
+ logger.info('Creating rados config object: %s' % objname)
+ result = subprocess.run(
+ cmd + ['put', objname, '-'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ timeout=10)
+ if result.returncode:
+ self.mgr.log.warning(
+ f'Unable to create rados config object {objname}: {result.stderr.decode("utf-8")}'
+ )
+ raise RuntimeError(result.stderr.decode("utf-8"))
+
+ def create_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str:
+ daemon_id = daemon_spec.daemon_id
+ spec = cast(NFSServiceSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+ entity: AuthEntity = self.get_auth_entity(daemon_id)
+
+ osd_caps = 'allow rw pool=%s namespace=%s' % (POOL_NAME, spec.service_id)
+
+ logger.info('Creating key for %s' % entity)
+ keyring = self.get_keyring_with_caps(entity,
+ ['mon', 'allow r',
+ 'osd', osd_caps])
+
+ return keyring
+
+ def create_rgw_keyring(self, daemon_spec: CephadmDaemonDeploySpec) -> str:
+ daemon_id = daemon_spec.daemon_id
+ entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw')
+
+ logger.info('Creating key for %s' % entity)
+ keyring = self.get_keyring_with_caps(entity,
+ ['mon', 'allow r',
+ 'osd', 'allow rwx tag rgw *=*'])
+
+ return keyring
+
+ def run_grace_tool(self,
+ spec: NFSServiceSpec,
+ action: str,
+ nodeid: str) -> None:
+ # write a temp keyring and referencing config file. this is a kludge
+ # because the ganesha-grace-tool can only authenticate as a client (and
+ # not a mgr). Also, it doesn't allow you to pass a keyring location via
+ # the command line, nor does it parse the CEPH_ARGS env var.
+ tmp_id = f'mgr.nfs.grace.{spec.service_name()}'
+ entity = AuthEntity(f'client.{tmp_id}')
+ keyring = self.get_keyring_with_caps(
+ entity,
+ ['mon', 'allow r', 'osd', f'allow rwx pool {POOL_NAME}']
+ )
+ tmp_keyring = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-keyring')
+ os.fchmod(tmp_keyring.fileno(), 0o600)
+ tmp_keyring.write(keyring)
+ tmp_keyring.flush()
+ tmp_conf = tempfile.NamedTemporaryFile(mode='w', prefix='mgr-grace-conf')
+ tmp_conf.write(self.mgr.get_minimal_ceph_conf())
+ tmp_conf.write(f'\tkeyring = {tmp_keyring.name}\n')
+ tmp_conf.flush()
+ try:
+ cmd: List[str] = [
+ 'ganesha-rados-grace',
+ '--cephconf', tmp_conf.name,
+ '--userid', tmp_id,
+ '--pool', POOL_NAME,
+ '--ns', cast(str, spec.service_id),
+ action, nodeid,
+ ]
+ self.mgr.log.debug(cmd)
+ result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ timeout=10)
+ if result.returncode:
+ self.mgr.log.warning(
+ f'ganesha-rados-grace tool failed: {result.stderr.decode("utf-8")}'
+ )
+ raise RuntimeError(f'grace tool failed: {result.stderr.decode("utf-8")}')
+
+ finally:
+ self.mgr.check_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': entity,
+ })
+
+ def remove_rgw_keyring(self, daemon: DaemonDescription) -> None:
+ assert daemon.daemon_id is not None
+ daemon_id: str = daemon.daemon_id
+ entity: AuthEntity = self.get_auth_entity(f'{daemon_id}-rgw')
+
+ logger.info(f'Removing key for {entity}')
+ self.mgr.check_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': entity,
+ })
+
+ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
+ super().post_remove(daemon, is_failed_deploy=is_failed_deploy)
+ self.remove_rgw_keyring(daemon)
+
+ def ok_to_stop(self,
+ daemon_ids: List[str],
+ force: bool = False,
+ known: Optional[List[str]] = None) -> HandleCommandResult:
+ # if only 1 nfs, alert user (this is not passable with --force)
+ warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, 'NFS', 1, True)
+ if warn:
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ # if reached here, there is > 1 nfs daemon.
+ if force:
+ return HandleCommandResult(0, warn_message, '')
+
+ # if reached here, > 1 nfs daemon and no force flag.
+ # Provide warning
+ warn_message = "WARNING: Removing NFS daemons can cause clients to lose connectivity. "
+ return HandleCommandResult(-errno.EBUSY, '', warn_message)
+
+ def purge(self, service_name: str) -> None:
+ if service_name not in self.mgr.spec_store:
+ return
+ spec = cast(NFSServiceSpec, self.mgr.spec_store[service_name].spec)
+
+ logger.info(f'Removing grace file for {service_name}')
+ cmd = [
+ 'rados',
+ '-n', f"mgr.{self.mgr.get_mgr_id()}",
+ '-k', str(self.mgr.get_ceph_option('keyring')),
+ '-p', POOL_NAME,
+ '--namespace', cast(str, spec.service_id),
+ 'rm', 'grace',
+ ]
+ subprocess.run(
+ cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ timeout=10
+ )
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py
new file mode 100644
index 000000000..5899ba49a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/services/osd.py
@@ -0,0 +1,956 @@
+import json
+import logging
+from threading import Lock
+from typing import List, Dict, Any, Set, Tuple, cast, Optional, TYPE_CHECKING
+
+from ceph.deployment import translate
+from ceph.deployment.drive_group import DriveGroupSpec
+from ceph.deployment.drive_selection import DriveSelection
+from ceph.deployment.inventory import Device
+from ceph.utils import datetime_to_str, str_to_datetime
+
+from datetime import datetime
+import orchestrator
+from cephadm.serve import CephadmServe
+from cephadm.utils import forall_hosts
+from ceph.utils import datetime_now
+from orchestrator import OrchestratorError, DaemonDescription
+from mgr_module import MonCommandFailed
+
+from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService
+
+if TYPE_CHECKING:
+ from cephadm.module import CephadmOrchestrator
+
+logger = logging.getLogger(__name__)
+
+
+class OSDService(CephService):
+ TYPE = 'osd'
+
+ def create_from_spec(self, drive_group: DriveGroupSpec) -> str:
+ logger.debug(f"Processing DriveGroup {drive_group}")
+ osd_id_claims = OsdIdClaims(self.mgr)
+ if osd_id_claims.get():
+ logger.info(
+ f"Found osd claims for drivegroup {drive_group.service_id} -> {osd_id_claims.get()}")
+
+ @forall_hosts
+ def create_from_spec_one(host: str, drive_selection: DriveSelection) -> Optional[str]:
+ # skip this host if there has been no change in inventory
+ if not self.mgr.cache.osdspec_needs_apply(host, drive_group):
+ self.mgr.log.debug("skipping apply of %s on %s (no change)" % (
+ host, drive_group))
+ return None
+ # skip this host if we cannot schedule here
+ if self.mgr.inventory.has_label(host, '_no_schedule'):
+ return None
+
+ osd_id_claims_for_host = osd_id_claims.filtered_by_host(host)
+
+ cmds: List[str] = self.driveselection_to_ceph_volume(drive_selection,
+ osd_id_claims_for_host)
+ if not cmds:
+ logger.debug("No data_devices, skipping DriveGroup: {}".format(
+ drive_group.service_id))
+ return None
+
+ logger.debug('Applying service osd.%s on host %s...' % (
+ drive_group.service_id, host
+ ))
+ start_ts = datetime_now()
+ env_vars: List[str] = [f"CEPH_VOLUME_OSDSPEC_AFFINITY={drive_group.service_id}"]
+ ret_msg = self.create_single_host(
+ drive_group, host, cmds,
+ replace_osd_ids=osd_id_claims_for_host, env_vars=env_vars
+ )
+ self.mgr.cache.update_osdspec_last_applied(
+ host, drive_group.service_name(), start_ts
+ )
+ self.mgr.cache.save_host(host)
+ return ret_msg
+
+ ret = create_from_spec_one(self.prepare_drivegroup(drive_group))
+ return ", ".join(filter(None, ret))
+
+ def create_single_host(self,
+ drive_group: DriveGroupSpec,
+ host: str, cmds: List[str], replace_osd_ids: List[str],
+ env_vars: Optional[List[str]] = None) -> str:
+ for cmd in cmds:
+ out, err, code = self._run_ceph_volume_command(host, cmd, env_vars=env_vars)
+ if code == 1 and ', it is already prepared' in '\n'.join(err):
+ # HACK: when we create against an existing LV, ceph-volume
+ # returns an error and the above message. To make this
+ # command idempotent, tolerate this "error" and continue.
+ logger.debug('the device was already prepared; continuing')
+ code = 0
+ if code:
+ raise RuntimeError(
+ 'cephadm exited with an error code: %d, stderr:%s' % (
+ code, '\n'.join(err)))
+ return self.deploy_osd_daemons_for_existing_osds(host, drive_group.service_name(),
+ replace_osd_ids)
+
+ def deploy_osd_daemons_for_existing_osds(self, host: str, service_name: str,
+ replace_osd_ids: Optional[List[str]] = None) -> str:
+
+ if replace_osd_ids is None:
+ replace_osd_ids = OsdIdClaims(self.mgr).filtered_by_host(host)
+ assert replace_osd_ids is not None
+
+ # check result: lvm
+ osds_elems: dict = CephadmServe(self.mgr)._run_cephadm_json(
+ host, 'osd', 'ceph-volume',
+ [
+ '--',
+ 'lvm', 'list',
+ '--format', 'json',
+ ])
+ before_osd_uuid_map = self.mgr.get_osd_uuid_map(only_up=True)
+ fsid = self.mgr._cluster_fsid
+ osd_uuid_map = self.mgr.get_osd_uuid_map()
+ created = []
+ for osd_id, osds in osds_elems.items():
+ for osd in osds:
+ if osd['type'] == 'db':
+ continue
+ if osd['tags']['ceph.cluster_fsid'] != fsid:
+ logger.debug('mismatched fsid, skipping %s' % osd)
+ continue
+ if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids:
+ # if it exists but is part of the replacement operation, don't skip
+ continue
+ if self.mgr.cache.has_daemon(f'osd.{osd_id}', host):
+ # cephadm daemon instance already exists
+ logger.debug(f'osd id {osd_id} daemon already exists')
+ continue
+ if osd_id not in osd_uuid_map:
+ logger.debug('osd id {} does not exist in cluster'.format(osd_id))
+ continue
+ if osd_uuid_map.get(osd_id) != osd['tags']['ceph.osd_fsid']:
+ logger.debug('mismatched osd uuid (cluster has %s, osd '
+ 'has %s)' % (
+ osd_uuid_map.get(osd_id),
+ osd['tags']['ceph.osd_fsid']))
+ continue
+
+ created.append(osd_id)
+ daemon_spec: CephadmDaemonDeploySpec = CephadmDaemonDeploySpec(
+ service_name=service_name,
+ daemon_id=str(osd_id),
+ host=host,
+ daemon_type='osd',
+ )
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ CephadmServe(self.mgr)._create_daemon(
+ daemon_spec,
+ osd_uuid_map=osd_uuid_map)
+
+ # check result: raw
+ raw_elems: dict = CephadmServe(self.mgr)._run_cephadm_json(
+ host, 'osd', 'ceph-volume',
+ [
+ '--',
+ 'raw', 'list',
+ '--format', 'json',
+ ])
+ for osd_uuid, osd in raw_elems.items():
+ if osd.get('ceph_fsid') != fsid:
+ continue
+ osd_id = str(osd.get('osd_id', '-1'))
+ if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids:
+ # if it exists but is part of the replacement operation, don't skip
+ continue
+ if self.mgr.cache.has_daemon(f'osd.{osd_id}', host):
+ # cephadm daemon instance already exists
+ logger.debug(f'osd id {osd_id} daemon already exists')
+ continue
+ if osd_id not in osd_uuid_map:
+ logger.debug('osd id {} does not exist in cluster'.format(osd_id))
+ continue
+ if osd_uuid_map.get(osd_id) != osd_uuid:
+ logger.debug('mismatched osd uuid (cluster has %s, osd '
+ 'has %s)' % (osd_uuid_map.get(osd_id), osd_uuid))
+ continue
+ if osd_id in created:
+ continue
+
+ created.append(osd_id)
+ daemon_spec = CephadmDaemonDeploySpec(
+ service_name=service_name,
+ daemon_id=osd_id,
+ host=host,
+ daemon_type='osd',
+ )
+ daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+ CephadmServe(self.mgr)._create_daemon(
+ daemon_spec,
+ osd_uuid_map=osd_uuid_map)
+
+ if created:
+ self.mgr.cache.invalidate_host_devices(host)
+ self.mgr.cache.invalidate_autotune(host)
+ return "Created osd(s) %s on host '%s'" % (','.join(created), host)
+ else:
+ return "Created no osd(s) on host %s; already created?" % host
+
+ def prepare_drivegroup(self, drive_group: DriveGroupSpec) -> List[Tuple[str, DriveSelection]]:
+ # 1) use fn_filter to determine matching_hosts
+ matching_hosts = drive_group.placement.filter_matching_hostspecs(
+ self.mgr._schedulable_hosts())
+ # 2) Map the inventory to the InventoryHost object
+ host_ds_map = []
+
+ # set osd_id_claims
+
+ def _find_inv_for_host(hostname: str, inventory_dict: dict) -> List[Device]:
+ # This is stupid and needs to be loaded with the host
+ for _host, _inventory in inventory_dict.items():
+ if _host == hostname:
+ return _inventory
+ raise OrchestratorError("No inventory found for host: {}".format(hostname))
+
+ # 3) iterate over matching_host and call DriveSelection
+ logger.debug(f"Checking matching hosts -> {matching_hosts}")
+ for host in matching_hosts:
+ inventory_for_host = _find_inv_for_host(host, self.mgr.cache.devices)
+ logger.debug(f"Found inventory for host {inventory_for_host}")
+
+ # List of Daemons on that host
+ dd_for_spec = self.mgr.cache.get_daemons_by_service(drive_group.service_name())
+ dd_for_spec_and_host = [dd for dd in dd_for_spec if dd.hostname == host]
+
+ drive_selection = DriveSelection(drive_group, inventory_for_host,
+ existing_daemons=len(dd_for_spec_and_host))
+ logger.debug(f"Found drive selection {drive_selection}")
+ if drive_group.method and drive_group.method == 'raw':
+ # ceph-volume can currently only handle a 1:1 mapping
+ # of data/db/wal devices for raw mode osds. If db/wal devices
+ # are defined and the number does not match the number of data
+ # devices, we need to bail out
+ if drive_selection.data_devices() and drive_selection.db_devices():
+ if len(drive_selection.data_devices()) != len(drive_selection.db_devices()):
+ raise OrchestratorError('Raw mode only supports a 1:1 ratio of data to db devices. Found '
+ f'{len(drive_selection.data_devices())} potential data device(s) and '
+ f'{len(drive_selection.db_devices())} potential db device(s) on host {host}')
+ if drive_selection.data_devices() and drive_selection.wal_devices():
+ if len(drive_selection.data_devices()) != len(drive_selection.wal_devices()):
+ raise OrchestratorError('Raw mode only supports a 1:1 ratio of data to wal devices. Found '
+ f'{len(drive_selection.data_devices())} potential data device(s) and '
+ f'{len(drive_selection.wal_devices())} potential wal device(s) on host {host}')
+ host_ds_map.append((host, drive_selection))
+ return host_ds_map
+
+ @staticmethod
+ def driveselection_to_ceph_volume(drive_selection: DriveSelection,
+ osd_id_claims: Optional[List[str]] = None,
+ preview: bool = False) -> List[str]:
+ logger.debug(f"Translating DriveGroup <{drive_selection.spec}> to ceph-volume command")
+ cmds: List[str] = translate.to_ceph_volume(drive_selection,
+ osd_id_claims, preview=preview).run()
+ logger.debug(f"Resulting ceph-volume cmds: {cmds}")
+ return cmds
+
+ def get_previews(self, host: str) -> List[Dict[str, Any]]:
+ # Find OSDSpecs that match host.
+ osdspecs = self.resolve_osdspecs_for_host(host)
+ return self.generate_previews(osdspecs, host)
+
+ def generate_previews(self, osdspecs: List[DriveGroupSpec], for_host: str) -> List[Dict[str, Any]]:
+ """
+
+ The return should look like this:
+
+ [
+ {'data': {<metadata>},
+ 'osdspec': <name of osdspec>,
+ 'host': <name of host>,
+ 'notes': <notes>
+ },
+
+ {'data': ...,
+ 'osdspec': ..,
+ 'host': ...,
+ 'notes': ...
+ }
+ ]
+
+ Note: One host can have multiple previews based on its assigned OSDSpecs.
+ """
+ self.mgr.log.debug(f"Generating OSDSpec previews for {osdspecs}")
+ ret_all: List[Dict[str, Any]] = []
+ if not osdspecs:
+ return ret_all
+ for osdspec in osdspecs:
+
+ # populate osd_id_claims
+ osd_id_claims = OsdIdClaims(self.mgr)
+
+ # prepare driveselection
+ for host, ds in self.prepare_drivegroup(osdspec):
+ if host != for_host:
+ continue
+
+ # driveselection for host
+ cmds: List[str] = self.driveselection_to_ceph_volume(ds,
+ osd_id_claims.filtered_by_host(host),
+ preview=True)
+ if not cmds:
+ logger.debug("No data_devices, skipping DriveGroup: {}".format(
+ osdspec.service_name()))
+ continue
+
+ # get preview data from ceph-volume
+ for cmd in cmds:
+ out, err, code = self._run_ceph_volume_command(host, cmd)
+ if out:
+ try:
+ concat_out: Dict[str, Any] = json.loads(' '.join(out))
+ except ValueError:
+ logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out))
+ concat_out = {}
+ notes = []
+ if osdspec.data_devices is not None and osdspec.data_devices.limit and len(concat_out) < osdspec.data_devices.limit:
+ found = len(concat_out)
+ limit = osdspec.data_devices.limit
+ notes.append(
+ f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit (Found: {found} | Limit: {limit})')
+ ret_all.append({'data': concat_out,
+ 'osdspec': osdspec.service_id,
+ 'host': host,
+ 'notes': notes})
+ return ret_all
+
+ def resolve_hosts_for_osdspecs(self,
+ specs: Optional[List[DriveGroupSpec]] = None
+ ) -> List[str]:
+ osdspecs = []
+ if specs:
+ osdspecs = [cast(DriveGroupSpec, spec) for spec in specs]
+ if not osdspecs:
+ self.mgr.log.debug("No OSDSpecs found")
+ return []
+ return sum([spec.placement.filter_matching_hostspecs(self.mgr._schedulable_hosts()) for spec in osdspecs], [])
+
+ def resolve_osdspecs_for_host(self, host: str,
+ specs: Optional[List[DriveGroupSpec]] = None) -> List[DriveGroupSpec]:
+ matching_specs = []
+ self.mgr.log.debug(f"Finding OSDSpecs for host: <{host}>")
+ if not specs:
+ specs = [cast(DriveGroupSpec, spec) for (sn, spec) in self.mgr.spec_store.spec_preview.items()
+ if spec.service_type == 'osd']
+ for spec in specs:
+ if host in spec.placement.filter_matching_hostspecs(self.mgr._schedulable_hosts()):
+ self.mgr.log.debug(f"Found OSDSpecs for host: <{host}> -> <{spec}>")
+ matching_specs.append(spec)
+ return matching_specs
+
+ def _run_ceph_volume_command(self, host: str,
+ cmd: str, env_vars: Optional[List[str]] = None
+ ) -> Tuple[List[str], List[str], int]:
+ self.mgr.inventory.assert_host(host)
+
+ # get bootstrap key
+ ret, keyring, err = self.mgr.check_mon_command({
+ 'prefix': 'auth get',
+ 'entity': 'client.bootstrap-osd',
+ })
+
+ j = json.dumps({
+ 'config': self.mgr.get_minimal_ceph_conf(),
+ 'keyring': keyring,
+ })
+
+ split_cmd = cmd.split(' ')
+ _cmd = ['--config-json', '-', '--']
+ _cmd.extend(split_cmd)
+ out, err, code = CephadmServe(self.mgr)._run_cephadm(
+ host, 'osd', 'ceph-volume',
+ _cmd,
+ env_vars=env_vars,
+ stdin=j,
+ error_ok=True)
+ return out, err, code
+
+ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None:
+ # Do not remove the osd.N keyring, if we failed to deploy the OSD, because
+ # we cannot recover from it. The OSD keys are created by ceph-volume and not by
+ # us.
+ if not is_failed_deploy:
+ super().post_remove(daemon, is_failed_deploy=is_failed_deploy)
+
+
+class OsdIdClaims(object):
+ """
+ Retrieve and provide osd ids that can be reused in the cluster
+ """
+
+ def __init__(self, mgr: "CephadmOrchestrator") -> None:
+ self.mgr: "CephadmOrchestrator" = mgr
+ self.osd_host_map: Dict[str, List[str]] = dict()
+ self.refresh()
+
+ def refresh(self) -> None:
+ try:
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'osd tree',
+ 'states': ['destroyed'],
+ 'format': 'json'
+ })
+ except MonCommandFailed as e:
+ logger.exception('osd tree failed')
+ raise OrchestratorError(str(e))
+ try:
+ tree = json.loads(out)
+ except ValueError:
+ logger.exception(f'Cannot decode JSON: \'{out}\'')
+ return
+
+ nodes = tree.get('nodes', {})
+ for node in nodes:
+ if node.get('type') == 'host':
+ self.osd_host_map.update(
+ {node.get('name'): [str(_id) for _id in node.get('children', list())]}
+ )
+ if self.osd_host_map:
+ self.mgr.log.info(f"Found osd claims -> {self.osd_host_map}")
+
+ def get(self) -> Dict[str, List[str]]:
+ return self.osd_host_map
+
+ def filtered_by_host(self, host: str) -> List[str]:
+ """
+ Return the list of osd ids that can be reused in a host
+
+ OSD id claims in CRUSH map are linked to the bare name of
+ the hostname. In case of FQDN hostnames the host is searched by the
+ bare name
+ """
+ return self.osd_host_map.get(host.split(".")[0], [])
+
+
+class RemoveUtil(object):
+ def __init__(self, mgr: "CephadmOrchestrator") -> None:
+ self.mgr: "CephadmOrchestrator" = mgr
+
+ def get_osds_in_cluster(self) -> List[str]:
+ osd_map = self.mgr.get_osdmap()
+ return [str(x.get('osd')) for x in osd_map.dump().get('osds', [])]
+
+ def osd_df(self) -> dict:
+ base_cmd = 'osd df'
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': base_cmd,
+ 'format': 'json'
+ })
+ try:
+ return json.loads(out)
+ except ValueError:
+ logger.exception(f'Cannot decode JSON: \'{out}\'')
+ return {}
+
+ def get_pg_count(self, osd_id: int, osd_df: Optional[dict] = None) -> int:
+ if not osd_df:
+ osd_df = self.osd_df()
+ osd_nodes = osd_df.get('nodes', [])
+ for osd_node in osd_nodes:
+ if osd_node.get('id') == int(osd_id):
+ return osd_node.get('pgs', -1)
+ return -1
+
+ def find_osd_stop_threshold(self, osds: List["OSD"]) -> Optional[List["OSD"]]:
+ """
+ Cut osd_id list in half until it's ok-to-stop
+
+ :param osds: list of osd_ids
+ :return: list of ods_ids that can be stopped at once
+ """
+ if not osds:
+ return []
+ while not self.ok_to_stop(osds):
+ if len(osds) <= 1:
+ # can't even stop one OSD, aborting
+ self.mgr.log.debug(
+ "Can't even stop one OSD. Cluster is probably busy. Retrying later..")
+ return []
+
+ # This potentially prolongs the global wait time.
+ self.mgr.event.wait(1)
+ # splitting osd_ids in half until ok_to_stop yields success
+ # maybe popping ids off one by one is better here..depends on the cluster size I guess..
+ # There's a lot of room for micro adjustments here
+ osds = osds[len(osds) // 2:]
+ return osds
+
+ # todo start draining
+ # return all([osd.start_draining() for osd in osds])
+
+ def ok_to_stop(self, osds: List["OSD"]) -> bool:
+ cmd_args = {
+ 'prefix': "osd ok-to-stop",
+ 'ids': [str(osd.osd_id) for osd in osds]
+ }
+ return self._run_mon_cmd(cmd_args, error_ok=True)
+
+ def set_osd_flag(self, osds: List["OSD"], flag: str) -> bool:
+ base_cmd = f"osd {flag}"
+ self.mgr.log.debug(f"running cmd: {base_cmd} on ids {osds}")
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': base_cmd,
+ 'ids': [str(osd.osd_id) for osd in osds]
+ })
+ if ret != 0:
+ self.mgr.log.error(f"Could not set {flag} flag for {osds}. <{err}>")
+ return False
+ self.mgr.log.info(f"{','.join([str(o) for o in osds])} now {flag}")
+ return True
+
+ def get_weight(self, osd: "OSD") -> Optional[float]:
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': 'osd crush tree',
+ 'format': 'json',
+ })
+ if ret != 0:
+ self.mgr.log.error(f"Could not dump crush weights. <{err}>")
+ return None
+ j = json.loads(out)
+ for n in j.get("nodes", []):
+ if n.get("name") == f"osd.{osd.osd_id}":
+ self.mgr.log.info(f"{osd} crush weight is {n.get('crush_weight')}")
+ return n.get("crush_weight")
+ return None
+
+ def reweight_osd(self, osd: "OSD", weight: float) -> bool:
+ self.mgr.log.debug(f"running cmd: osd crush reweight on {osd}")
+ ret, out, err = self.mgr.mon_command({
+ 'prefix': "osd crush reweight",
+ 'name': f"osd.{osd.osd_id}",
+ 'weight': weight,
+ })
+ if ret != 0:
+ self.mgr.log.error(f"Could not reweight {osd} to {weight}. <{err}>")
+ return False
+ self.mgr.log.info(f"{osd} weight is now {weight}")
+ return True
+
+ def zap_osd(self, osd: "OSD") -> str:
+ "Zaps all devices that are associated with an OSD"
+ if osd.hostname is not None:
+ out, err, code = CephadmServe(self.mgr)._run_cephadm(
+ osd.hostname, 'osd', 'ceph-volume',
+ ['--', 'lvm', 'zap', '--destroy', '--osd-id', str(osd.osd_id)],
+ error_ok=True)
+ self.mgr.cache.invalidate_host_devices(osd.hostname)
+ if code:
+ raise OrchestratorError('Zap failed: %s' % '\n'.join(out + err))
+ return '\n'.join(out + err)
+ raise OrchestratorError(f"Failed to zap OSD {osd.osd_id} because host was unknown")
+
+ def safe_to_destroy(self, osd_ids: List[int]) -> bool:
+ """ Queries the safe-to-destroy flag for OSDs """
+ cmd_args = {'prefix': 'osd safe-to-destroy',
+ 'ids': [str(x) for x in osd_ids]}
+ return self._run_mon_cmd(cmd_args, error_ok=True)
+
+ def destroy_osd(self, osd_id: int) -> bool:
+ """ Destroys an OSD (forcefully) """
+ cmd_args = {'prefix': 'osd destroy-actual',
+ 'id': int(osd_id),
+ 'yes_i_really_mean_it': True}
+ return self._run_mon_cmd(cmd_args)
+
+ def purge_osd(self, osd_id: int) -> bool:
+ """ Purges an OSD from the cluster (forcefully) """
+ cmd_args = {
+ 'prefix': 'osd purge-actual',
+ 'id': int(osd_id),
+ 'yes_i_really_mean_it': True
+ }
+ return self._run_mon_cmd(cmd_args)
+
+ def _run_mon_cmd(self, cmd_args: dict, error_ok: bool = False) -> bool:
+ """
+ Generic command to run mon_command and evaluate/log the results
+ """
+ ret, out, err = self.mgr.mon_command(cmd_args)
+ if ret != 0:
+ self.mgr.log.debug(f"ran {cmd_args} with mon_command")
+ if not error_ok:
+ self.mgr.log.error(f"cmd: {cmd_args.get('prefix')} failed with: {err}. (errno:{ret})")
+ return False
+ self.mgr.log.debug(f"cmd: {cmd_args.get('prefix')} returns: {out}")
+ return True
+
+
+class NotFoundError(Exception):
+ pass
+
+
+class OSD:
+
+ def __init__(self,
+ osd_id: int,
+ remove_util: RemoveUtil,
+ drain_started_at: Optional[datetime] = None,
+ process_started_at: Optional[datetime] = None,
+ drain_stopped_at: Optional[datetime] = None,
+ drain_done_at: Optional[datetime] = None,
+ draining: bool = False,
+ started: bool = False,
+ stopped: bool = False,
+ replace: bool = False,
+ force: bool = False,
+ hostname: Optional[str] = None,
+ zap: bool = False):
+ # the ID of the OSD
+ self.osd_id = osd_id
+
+ # when did process (not the actual draining) start
+ self.process_started_at = process_started_at
+
+ # when did the drain start
+ self.drain_started_at = drain_started_at
+
+ # when did the drain stop
+ self.drain_stopped_at = drain_stopped_at
+
+ # when did the drain finish
+ self.drain_done_at = drain_done_at
+
+ # did the draining start
+ self.draining = draining
+
+ # was the operation started
+ self.started = started
+
+ # was the operation stopped
+ self.stopped = stopped
+
+ # If this is a replace or remove operation
+ self.replace = replace
+ # If we wait for the osd to be drained
+ self.force = force
+ # The name of the node
+ self.hostname = hostname
+
+ # mgr obj to make mgr/mon calls
+ self.rm_util: RemoveUtil = remove_util
+
+ self.original_weight: Optional[float] = None
+
+ # Whether devices associated with the OSD should be zapped (DATA ERASED)
+ self.zap = zap
+
+ def start(self) -> None:
+ if self.started:
+ logger.debug(f"Already started draining {self}")
+ return None
+ self.started = True
+ self.stopped = False
+
+ def start_draining(self) -> bool:
+ if self.stopped:
+ logger.debug(f"Won't start draining {self}. OSD draining is stopped.")
+ return False
+ if self.replace:
+ self.rm_util.set_osd_flag([self], 'out')
+ else:
+ self.original_weight = self.rm_util.get_weight(self)
+ self.rm_util.reweight_osd(self, 0.0)
+ self.drain_started_at = datetime.utcnow()
+ self.draining = True
+ logger.debug(f"Started draining {self}.")
+ return True
+
+ def stop_draining(self) -> bool:
+ if self.replace:
+ self.rm_util.set_osd_flag([self], 'in')
+ else:
+ if self.original_weight:
+ self.rm_util.reweight_osd(self, self.original_weight)
+ self.drain_stopped_at = datetime.utcnow()
+ self.draining = False
+ logger.debug(f"Stopped draining {self}.")
+ return True
+
+ def stop(self) -> None:
+ if self.stopped:
+ logger.debug(f"Already stopped draining {self}")
+ return None
+ self.started = False
+ self.stopped = True
+ self.stop_draining()
+
+ @property
+ def is_draining(self) -> bool:
+ """
+ Consider an OSD draining when it is
+ actively draining but not yet empty
+ """
+ return self.draining and not self.is_empty
+
+ @property
+ def is_ok_to_stop(self) -> bool:
+ return self.rm_util.ok_to_stop([self])
+
+ @property
+ def is_empty(self) -> bool:
+ if self.get_pg_count() == 0:
+ if not self.drain_done_at:
+ self.drain_done_at = datetime.utcnow()
+ self.draining = False
+ return True
+ return False
+
+ def safe_to_destroy(self) -> bool:
+ return self.rm_util.safe_to_destroy([self.osd_id])
+
+ def down(self) -> bool:
+ return self.rm_util.set_osd_flag([self], 'down')
+
+ def destroy(self) -> bool:
+ return self.rm_util.destroy_osd(self.osd_id)
+
+ def do_zap(self) -> str:
+ return self.rm_util.zap_osd(self)
+
+ def purge(self) -> bool:
+ return self.rm_util.purge_osd(self.osd_id)
+
+ def get_pg_count(self) -> int:
+ return self.rm_util.get_pg_count(self.osd_id)
+
+ @property
+ def exists(self) -> bool:
+ return str(self.osd_id) in self.rm_util.get_osds_in_cluster()
+
+ def drain_status_human(self) -> str:
+ default_status = 'not started'
+ status = 'started' if self.started and not self.draining else default_status
+ status = 'draining' if self.draining else status
+ status = 'done, waiting for purge' if self.drain_done_at and not self.draining else status
+ return status
+
+ def pg_count_str(self) -> str:
+ return 'n/a' if self.get_pg_count() < 0 else str(self.get_pg_count())
+
+ def to_json(self) -> dict:
+ out: Dict[str, Any] = dict()
+ out['osd_id'] = self.osd_id
+ out['started'] = self.started
+ out['draining'] = self.draining
+ out['stopped'] = self.stopped
+ out['replace'] = self.replace
+ out['force'] = self.force
+ out['zap'] = self.zap
+ out['hostname'] = self.hostname # type: ignore
+
+ for k in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']:
+ if getattr(self, k):
+ out[k] = datetime_to_str(getattr(self, k))
+ else:
+ out[k] = getattr(self, k)
+ return out
+
+ @classmethod
+ def from_json(cls, inp: Optional[Dict[str, Any]], rm_util: RemoveUtil) -> Optional["OSD"]:
+ if not inp:
+ return None
+ for date_field in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']:
+ if inp.get(date_field):
+ inp.update({date_field: str_to_datetime(inp.get(date_field, ''))})
+ inp.update({'remove_util': rm_util})
+ if 'nodename' in inp:
+ hostname = inp.pop('nodename')
+ inp['hostname'] = hostname
+ return cls(**inp)
+
+ def __hash__(self) -> int:
+ return hash(self.osd_id)
+
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, OSD):
+ return NotImplemented
+ return self.osd_id == other.osd_id
+
+ def __repr__(self) -> str:
+ return f"osd.{self.osd_id}{' (draining)' if self.draining else ''}"
+
+
+class OSDRemovalQueue(object):
+
+ def __init__(self, mgr: "CephadmOrchestrator") -> None:
+ self.mgr: "CephadmOrchestrator" = mgr
+ self.osds: Set[OSD] = set()
+ self.rm_util = RemoveUtil(mgr)
+
+ # locks multithreaded access to self.osds. Please avoid locking
+ # network calls, like mon commands.
+ self.lock = Lock()
+
+ def process_removal_queue(self) -> None:
+ """
+ Performs actions in the _serve() loop to remove an OSD
+ when criteria is met.
+
+ we can't hold self.lock, as we're calling _remove_daemon in the loop
+ """
+
+ # make sure that we don't run on OSDs that are not in the cluster anymore.
+ self.cleanup()
+
+ # find osds that are ok-to-stop and not yet draining
+ ready_to_drain_osds = self._ready_to_drain_osds()
+ if ready_to_drain_osds:
+ # start draining those
+ _ = [osd.start_draining() for osd in ready_to_drain_osds]
+
+ all_osds = self.all_osds()
+
+ logger.debug(
+ f"{self.queue_size()} OSDs are scheduled "
+ f"for removal: {all_osds}")
+
+ # Check all osds for their state and take action (remove, purge etc)
+ new_queue: Set[OSD] = set()
+ for osd in all_osds: # type: OSD
+ if not osd.force:
+ # skip criteria
+ if not osd.is_empty:
+ logger.debug(f"{osd} is not empty yet. Waiting a bit more")
+ new_queue.add(osd)
+ continue
+
+ if not osd.safe_to_destroy():
+ logger.debug(
+ f"{osd} is not safe-to-destroy yet. Waiting a bit more")
+ new_queue.add(osd)
+ continue
+
+ # abort criteria
+ if not osd.down():
+ # also remove it from the remove_osd list and set a health_check warning?
+ raise orchestrator.OrchestratorError(
+ f"Could not mark {osd} down")
+
+ # stop and remove daemon
+ assert osd.hostname is not None
+
+ if self.mgr.cache.has_daemon(f'osd.{osd.osd_id}'):
+ CephadmServe(self.mgr)._remove_daemon(f'osd.{osd.osd_id}', osd.hostname)
+ logger.info(f"Successfully removed {osd} on {osd.hostname}")
+ else:
+ logger.info(f"Daemon {osd} on {osd.hostname} was already removed")
+
+ if osd.replace:
+ # mark destroyed in osdmap
+ if not osd.destroy():
+ raise orchestrator.OrchestratorError(
+ f"Could not destroy {osd}")
+ logger.info(
+ f"Successfully destroyed old {osd} on {osd.hostname}; ready for replacement")
+ else:
+ # purge from osdmap
+ if not osd.purge():
+ raise orchestrator.OrchestratorError(f"Could not purge {osd}")
+ logger.info(f"Successfully purged {osd} on {osd.hostname}")
+
+ if osd.zap:
+ # throws an exception if the zap fails
+ logger.info(f"Zapping devices for {osd} on {osd.hostname}")
+ osd.do_zap()
+ logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}")
+
+ logger.debug(f"Removing {osd} from the queue.")
+
+ # self could change while this is processing (osds get added from the CLI)
+ # The new set is: 'an intersection of all osds that are still not empty/removed (new_queue) and
+ # osds that were added while this method was executed'
+ with self.lock:
+ self.osds.intersection_update(new_queue)
+ self._save_to_store()
+
+ def cleanup(self) -> None:
+ # OSDs can always be cleaned up manually. This ensures that we run on existing OSDs
+ with self.lock:
+ for osd in self._not_in_cluster():
+ self.osds.remove(osd)
+
+ def _ready_to_drain_osds(self) -> List["OSD"]:
+ """
+ Returns OSDs that are ok to stop and not yet draining. Only returns as many OSDs as can
+ be accomodated by the 'max_osd_draining_count' config value, considering the number of OSDs
+ that are already draining.
+ """
+ draining_limit = max(1, self.mgr.max_osd_draining_count)
+ num_already_draining = len(self.draining_osds())
+ num_to_start_draining = max(0, draining_limit - num_already_draining)
+ stoppable_osds = self.rm_util.find_osd_stop_threshold(self.idling_osds())
+ return [] if stoppable_osds is None else stoppable_osds[:num_to_start_draining]
+
+ def _save_to_store(self) -> None:
+ osd_queue = [osd.to_json() for osd in self.osds]
+ logger.debug(f"Saving {osd_queue} to store")
+ self.mgr.set_store('osd_remove_queue', json.dumps(osd_queue))
+
+ def load_from_store(self) -> None:
+ with self.lock:
+ for k, v in self.mgr.get_store_prefix('osd_remove_queue').items():
+ for osd in json.loads(v):
+ logger.debug(f"Loading osd ->{osd} from store")
+ osd_obj = OSD.from_json(osd, rm_util=self.rm_util)
+ if osd_obj is not None:
+ self.osds.add(osd_obj)
+
+ def as_osd_ids(self) -> List[int]:
+ with self.lock:
+ return [osd.osd_id for osd in self.osds]
+
+ def queue_size(self) -> int:
+ with self.lock:
+ return len(self.osds)
+
+ def draining_osds(self) -> List["OSD"]:
+ with self.lock:
+ return [osd for osd in self.osds if osd.is_draining]
+
+ def idling_osds(self) -> List["OSD"]:
+ with self.lock:
+ return [osd for osd in self.osds if not osd.is_draining and not osd.is_empty]
+
+ def empty_osds(self) -> List["OSD"]:
+ with self.lock:
+ return [osd for osd in self.osds if osd.is_empty]
+
+ def all_osds(self) -> List["OSD"]:
+ with self.lock:
+ return [osd for osd in self.osds]
+
+ def _not_in_cluster(self) -> List["OSD"]:
+ return [osd for osd in self.osds if not osd.exists]
+
+ def enqueue(self, osd: "OSD") -> None:
+ if not osd.exists:
+ raise NotFoundError()
+ with self.lock:
+ self.osds.add(osd)
+ osd.start()
+
+ def rm(self, osd: "OSD") -> None:
+ if not osd.exists:
+ raise NotFoundError()
+ osd.stop()
+ with self.lock:
+ try:
+ logger.debug(f'Removing {osd} from the queue.')
+ self.osds.remove(osd)
+ except KeyError:
+ logger.debug(f"Could not find {osd} in queue.")
+ raise KeyError
+
+ def __eq__(self, other: Any) -> bool:
+ if not isinstance(other, OSDRemovalQueue):
+ return False
+ with self.lock:
+ return self.osds == other.osds
diff --git a/src/pybind/mgr/cephadm/template.py b/src/pybind/mgr/cephadm/template.py
new file mode 100644
index 000000000..0d62e587c
--- /dev/null
+++ b/src/pybind/mgr/cephadm/template.py
@@ -0,0 +1,109 @@
+import copy
+from typing import Optional, TYPE_CHECKING
+
+from jinja2 import Environment, PackageLoader, select_autoescape, StrictUndefined
+from jinja2 import exceptions as j2_exceptions
+
+if TYPE_CHECKING:
+ from cephadm.module import CephadmOrchestrator
+
+
+class TemplateError(Exception):
+ pass
+
+
+class UndefinedError(TemplateError):
+ pass
+
+
+class TemplateNotFoundError(TemplateError):
+ pass
+
+
+class TemplateEngine:
+ def render(self, name: str, context: Optional[dict] = None) -> str:
+ raise NotImplementedError()
+
+
+class Jinja2Engine(TemplateEngine):
+ def __init__(self) -> None:
+ self.env = Environment(
+ loader=PackageLoader('cephadm', 'templates'),
+ autoescape=select_autoescape(['html', 'xml'], default_for_string=False),
+ trim_blocks=True,
+ lstrip_blocks=True,
+ undefined=StrictUndefined
+ )
+
+ def render(self, name: str, context: Optional[dict] = None) -> str:
+ try:
+ template = self.env.get_template(name)
+ if context is None:
+ return template.render()
+ return template.render(context)
+ except j2_exceptions.UndefinedError as e:
+ raise UndefinedError(e.message)
+ except j2_exceptions.TemplateNotFound as e:
+ raise TemplateNotFoundError(e.message)
+
+ def render_plain(self, source: str, context: Optional[dict]) -> str:
+ try:
+ template = self.env.from_string(source)
+ if context is None:
+ return template.render()
+ return template.render(context)
+ except j2_exceptions.UndefinedError as e:
+ raise UndefinedError(e.message)
+ except j2_exceptions.TemplateNotFound as e:
+ raise TemplateNotFoundError(e.message)
+
+
+class TemplateMgr:
+ def __init__(self, mgr: "CephadmOrchestrator"):
+ self.engine = Jinja2Engine()
+ self.base_context = {
+ 'cephadm_managed': 'This file is generated by cephadm.'
+ }
+ self.mgr = mgr
+
+ def render(self, name: str,
+ context: Optional[dict] = None,
+ managed_context: bool = True,
+ host: Optional[str] = None) -> str:
+ """Render a string from a template with context.
+
+ :param name: template name. e.g. services/nfs/ganesha.conf.j2
+ :type name: str
+ :param context: a dictionary that contains values to be used in the template, defaults
+ to None
+ :type context: Optional[dict], optional
+ :param managed_context: to inject default context like managed header or not, defaults
+ to True
+ :type managed_context: bool, optional
+ :param host: The host name used to build the key to access
+ the module's persistent key-value store.
+ :type host: Optional[str], optional
+ :return: the templated string
+ :rtype: str
+ """
+ ctx = {}
+ if managed_context:
+ ctx = copy.deepcopy(self.base_context)
+ if context is not None:
+ ctx = {**ctx, **context}
+
+ # Check if the given name exists in the module's persistent
+ # key-value store, e.g.
+ # - blink_device_light_cmd
+ # - <host>/blink_device_light_cmd
+ # - services/nfs/ganesha.conf
+ store_name = name.rstrip('.j2')
+ custom_template = self.mgr.get_store(store_name, None)
+ if host and custom_template is None:
+ store_name = '{}/{}'.format(host, store_name)
+ custom_template = self.mgr.get_store(store_name, None)
+
+ if custom_template:
+ return self.engine.render_plain(custom_template, ctx)
+ else:
+ return self.engine.render(name, ctx)
diff --git a/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2 b/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2
new file mode 100644
index 000000000..dab115833
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/blink_device_light_cmd.j2
@@ -0,0 +1 @@
+lsmcli local-disk-{{ ident_fault }}-led-{{'on' if on else 'off'}} --path '{{ path or dev }}'
diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
new file mode 100644
index 000000000..4e394106f
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
@@ -0,0 +1,47 @@
+# {{ cephadm_managed }}
+# See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+global:
+ resolve_timeout: 5m
+{% if not secure %}
+ http_config:
+ tls_config:
+ insecure_skip_verify: true
+{% endif %}
+
+route:
+ receiver: 'default'
+ routes:
+ - group_by: ['alertname']
+ group_wait: 10s
+ group_interval: 10s
+ repeat_interval: 1h
+ receiver: 'ceph-dashboard'
+{% if snmp_gateway_urls %}
+ continue: true
+ - receiver: 'snmp-gateway'
+ repeat_interval: 1h
+ group_interval: 10s
+ group_by: ['alertname']
+ match_re:
+ oid: "(1.3.6.1.4.1.50495.).*"
+{% endif %}
+
+receivers:
+- name: 'default'
+ webhook_configs:
+{% for url in default_webhook_urls %}
+ - url: '{{ url }}'
+{% endfor %}
+- name: 'ceph-dashboard'
+ webhook_configs:
+{% for url in dashboard_urls %}
+ - url: '{{ url }}/api/prometheus_receiver'
+{% endfor %}
+{% if snmp_gateway_urls %}
+- name: 'snmp-gateway'
+ webhook_configs:
+{% for url in snmp_gateway_urls %}
+ - url: '{{ url }}'
+{% endfor %}
+{% endif %}
diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2
new file mode 100644
index 000000000..170e6f246
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2
@@ -0,0 +1,18 @@
+# {{ cephadm_managed }}
+deleteDatasources:
+{% for host in hosts %}
+ - name: 'Dashboard{{ loop.index }}'
+ orgId: 1
+{% endfor %}
+
+datasources:
+{% for host in hosts %}
+ - name: 'Dashboard{{ loop.index }}'
+ type: 'prometheus'
+ access: 'proxy'
+ orgId: 1
+ url: '{{ host }}'
+ basicAuth: false
+ isDefault: {{ 'true' if loop.first else 'false' }}
+ editable: false
+{% endfor %}
diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2
new file mode 100644
index 000000000..cf23802d7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2
@@ -0,0 +1,24 @@
+# {{ cephadm_managed }}
+[users]
+ default_theme = light
+[auth.anonymous]
+ enabled = true
+ org_name = 'Main Org.'
+ org_role = 'Viewer'
+[server]
+ domain = 'bootstrap.storage.lab'
+ protocol = https
+ cert_file = /etc/grafana/certs/cert_file
+ cert_key = /etc/grafana/certs/cert_key
+ http_port = {{ http_port }}
+ http_addr = {{ http_addr }}
+[security]
+{% if not initial_admin_password %}
+ disable_initial_admin_creation = true
+{% else %}
+ admin_user = admin
+ admin_password = {{ initial_admin_password }}
+{% endif %}
+ cookie_secure = true
+ cookie_samesite = none
+ allow_embedding = true
diff --git a/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2 b/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2
new file mode 100644
index 000000000..cb84f1d07
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/ingress/haproxy.cfg.j2
@@ -0,0 +1,83 @@
+# {{ cephadm_managed }}
+global
+ log 127.0.0.1 local2
+ chroot /var/lib/haproxy
+ pidfile /var/lib/haproxy/haproxy.pid
+ maxconn 8000
+ daemon
+ stats socket /var/lib/haproxy/stats
+{% if spec.ssl_cert %}
+ {% if spec.ssl_dh_param %}
+ tune.ssl.default-dh-param {{ spec.ssl_dh_param }}
+ {% endif %}
+ {% if spec.ssl_ciphers %}
+ ssl-default-bind-ciphers {{ spec.ssl_ciphers | join(':') }}
+ {% endif %}
+ {% if spec.ssl_options %}
+ ssl-default-bind-options {{ spec.ssl_options | join(' ') }}
+ {% endif %}
+{% endif %}
+
+defaults
+ mode {{ mode }}
+ log global
+{% if mode == 'http' %}
+ option httplog
+ option dontlognull
+ option http-server-close
+ option forwardfor except 127.0.0.0/8
+ option redispatch
+ retries 3
+ timeout queue 20s
+ timeout connect 5s
+ timeout http-request 1s
+ timeout http-keep-alive 5s
+ timeout client 1s
+ timeout server 1s
+ timeout check 5s
+{% endif %}
+{% if mode == 'tcp' %}
+ timeout queue 1m
+ timeout connect 10s
+ timeout client 1m
+ timeout server 1m
+ timeout check 10s
+{% endif %}
+ maxconn 8000
+
+frontend stats
+ mode http
+ bind {{ ip }}:{{ monitor_port }}
+ bind localhost:{{ monitor_port }}
+ stats enable
+ stats uri /stats
+ stats refresh 10s
+ stats auth {{ user }}:{{ password }}
+ http-request use-service prometheus-exporter if { path /metrics }
+ monitor-uri /health
+
+frontend frontend
+{% if spec.ssl_cert %}
+ bind {{ ip }}:{{ frontend_port }} ssl crt /var/lib/haproxy/haproxy.pem
+{% else %}
+ bind {{ ip }}:{{ frontend_port }}
+{% endif %}
+ default_backend backend
+
+backend backend
+{% if mode == 'http' %}
+ option forwardfor
+ balance static-rr
+ option httpchk HEAD / HTTP/1.0
+ {% for server in servers %}
+ server {{ server.name }} {{ server.ip }}:{{ server.port }} check weight 100
+ {% endfor %}
+{% endif %}
+{% if mode == 'tcp' %}
+ mode tcp
+ balance source
+ hash-type consistent
+ {% for server in servers %}
+ server {{ server.name }} {{ server.ip }}:{{ server.port }}
+ {% endfor %}
+{% endif %}
diff --git a/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2
new file mode 100644
index 000000000..f560c9756
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2
@@ -0,0 +1,34 @@
+# {{ cephadm_managed }}
+vrrp_script check_backend {
+ script "{{ script }}"
+ weight -20
+ interval 2
+ rise 2
+ fall 2
+}
+
+{% for x in range(virtual_ips|length) %}
+vrrp_instance VI_{{ x }} {
+ state {{ states[x] }}
+ priority {{ priorities[x] }}
+ interface {{ interface }}
+ virtual_router_id {{ 50 + x }}
+ advert_int 1
+ authentication {
+ auth_type PASS
+ auth_pass {{ password }}
+ }
+ unicast_src_ip {{ host_ip }}
+ unicast_peer {
+ {% for ip in other_ips %}
+ {{ ip }}
+ {% endfor %}
+ }
+ virtual_ipaddress {
+ {{ virtual_ips[x] }} dev {{ interface }}
+ }
+ track_script {
+ check_backend
+ }
+}
+{% endfor %}
diff --git a/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2 b/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2
new file mode 100644
index 000000000..c2582ace7
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/iscsi/iscsi-gateway.cfg.j2
@@ -0,0 +1,13 @@
+# {{ cephadm_managed }}
+[config]
+cluster_client_name = {{ client_name }}
+pool = {{ spec.pool }}
+trusted_ip_list = {{ trusted_ip_list|default("''", true) }}
+minimum_gateways = 1
+api_port = {{ spec.api_port|default("''", true) }}
+api_user = {{ spec.api_user|default("''", true) }}
+api_password = {{ spec.api_password|default("''", true) }}
+api_secure = {{ spec.api_secure|default('False', true) }}
+log_to_stderr = True
+log_to_stderr_prefix = debug
+log_to_file = False
diff --git a/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2
new file mode 100644
index 000000000..9d6e15f1c
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2
@@ -0,0 +1,35 @@
+# {{ cephadm_managed }}
+NFS_CORE_PARAM {
+ Enable_NLM = false;
+ Enable_RQUOTA = false;
+ Protocols = 4;
+ NFS_Port = {{ port }};
+{% if bind_addr %}
+ Bind_addr = {{ bind_addr }};
+{% endif %}
+}
+
+NFSv4 {
+ Delegations = false;
+ RecoveryBackend = 'rados_cluster';
+ Minor_Versions = 1, 2;
+}
+
+RADOS_KV {
+ UserId = "{{ user }}";
+ nodeid = "{{ nodeid }}";
+ pool = "{{ pool }}";
+ namespace = "{{ namespace }}";
+}
+
+RADOS_URLS {
+ UserId = "{{ user }}";
+ watch_url = "{{ url }}";
+}
+
+RGW {
+ cluster = "ceph";
+ name = "client.{{ rgw_user }}";
+}
+
+%url {{ url }}
diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
new file mode 100644
index 000000000..bb0a8fcae
--- /dev/null
+++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
@@ -0,0 +1,41 @@
+# {{ cephadm_managed }}
+global:
+ scrape_interval: 10s
+ evaluation_interval: 10s
+rule_files:
+ - /etc/prometheus/alerting/*
+{% if alertmgr_targets %}
+alerting:
+ alertmanagers:
+ - scheme: http
+ static_configs:
+ - targets: [{{ alertmgr_targets|join(', ') }}]
+{% endif %}
+scrape_configs:
+ - job_name: 'ceph'
+ honor_labels: true
+ static_configs:
+ - targets:
+{% for mgr in mgr_scrape_list %}
+ - '{{ mgr }}'
+{% endfor %}
+
+{% if nodes %}
+ - job_name: 'node'
+ static_configs:
+{% for node in nodes %}
+ - targets: ['{{ node.url }}']
+ labels:
+ instance: '{{ node.hostname }}'
+{% endfor %}
+{% endif %}
+
+{% if haproxy_targets %}
+ - job_name: 'haproxy'
+ static_configs:
+{% for haproxy in haproxy_targets %}
+ - targets: [{{ haproxy.url }}]
+ labels:
+ instance: '{{ haproxy.service }}'
+{% endfor %}
+{% endif %}
diff --git a/src/pybind/mgr/cephadm/tests/__init__.py b/src/pybind/mgr/cephadm/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/__init__.py
diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py
new file mode 100644
index 000000000..e8add2c7b
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/conftest.py
@@ -0,0 +1,27 @@
+import pytest
+
+from cephadm.services.osd import RemoveUtil, OSD
+from tests import mock
+
+from .fixtures import with_cephadm_module
+
+
+@pytest.fixture()
+def cephadm_module():
+ with with_cephadm_module({}) as m:
+ yield m
+
+
+@pytest.fixture()
+def rm_util():
+ with with_cephadm_module({}) as m:
+ r = RemoveUtil.__new__(RemoveUtil)
+ r.__init__(m)
+ yield r
+
+
+@pytest.fixture()
+def osd_obj():
+ with mock.patch("cephadm.services.osd.RemoveUtil"):
+ o = OSD(0, mock.MagicMock())
+ yield o
diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py
new file mode 100644
index 000000000..8c2e1cfbf
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/fixtures.py
@@ -0,0 +1,151 @@
+import fnmatch
+from contextlib import contextmanager
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
+from ceph.utils import datetime_to_str, datetime_now
+from cephadm.serve import CephadmServe
+
+try:
+ from typing import Any, Iterator, List
+except ImportError:
+ pass
+
+from cephadm import CephadmOrchestrator
+from orchestrator import raise_if_exception, OrchResult, HostSpec, DaemonDescriptionStatus
+from tests import mock
+
+
+def get_ceph_option(_, key):
+ return __file__
+
+
+def get_module_option_ex(_, module, key, default=None):
+ if module == 'prometheus':
+ if key == 'server_port':
+ return 9283
+ return None
+
+
+def _run_cephadm(ret):
+ def foo(s, host, entity, cmd, e, **kwargs):
+ if cmd == 'gather-facts':
+ return '{}', '', 0
+ return [ret], '', 0
+ return foo
+
+
+def match_glob(val, pat):
+ ok = fnmatch.fnmatchcase(val, pat)
+ if not ok:
+ assert pat in val
+
+
+@contextmanager
+def with_cephadm_module(module_options=None, store=None):
+ """
+ :param module_options: Set opts as if they were set before module.__init__ is called
+ :param store: Set the store before module.__init__ is called
+ """
+ with mock.patch("cephadm.module.CephadmOrchestrator.get_ceph_option", get_ceph_option),\
+ mock.patch("cephadm.services.osd.RemoveUtil._run_mon_cmd"), \
+ mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex),\
+ mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \
+ mock.patch("cephadm.module.CephadmOrchestrator.remote"), \
+ mock.patch('cephadm.offline_watcher.OfflineHostWatcher.run'):
+
+ m = CephadmOrchestrator.__new__(CephadmOrchestrator)
+ if module_options is not None:
+ for k, v in module_options.items():
+ m._ceph_set_module_option('cephadm', k, v)
+ if store is None:
+ store = {}
+ if '_ceph_get/mon_map' not in store:
+ m.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ if '_ceph_get/mgr_map' not in store:
+ m.mock_store_set('_ceph_get', 'mgr_map', {
+ 'services': {
+ 'dashboard': 'http://[::1]:8080',
+ 'prometheus': 'http://[::1]:8081'
+ },
+ 'modules': ['dashboard', 'prometheus'],
+ })
+ for k, v in store.items():
+ m._ceph_set_store(k, v)
+
+ m.__init__('cephadm', 0, 0)
+ m._cluster_fsid = "fsid"
+ yield m
+
+
+def wait(m: CephadmOrchestrator, c: OrchResult) -> Any:
+ return raise_if_exception(c)
+
+
+@contextmanager
+def with_host(m: CephadmOrchestrator, name, addr='1::4', refresh_hosts=True, rm_with_force=True):
+ with mock.patch("cephadm.utils.resolve_ip", return_value=addr):
+ wait(m, m.add_host(HostSpec(hostname=name)))
+ if refresh_hosts:
+ CephadmServe(m)._refresh_hosts_and_daemons()
+ yield
+ wait(m, m.remove_host(name, force=rm_with_force))
+
+
+def assert_rm_service(cephadm: CephadmOrchestrator, srv_name):
+ mon_or_mgr = cephadm.spec_store[srv_name].spec.service_type in ('mon', 'mgr')
+ if mon_or_mgr:
+ assert 'Unable' in wait(cephadm, cephadm.remove_service(srv_name))
+ return
+ assert wait(cephadm, cephadm.remove_service(srv_name)) == f'Removed service {srv_name}'
+ assert cephadm.spec_store[srv_name].deleted is not None
+ CephadmServe(cephadm)._check_daemons()
+ CephadmServe(cephadm)._apply_all_services()
+ assert cephadm.spec_store[srv_name].deleted
+ unmanaged = cephadm.spec_store[srv_name].spec.unmanaged
+ CephadmServe(cephadm)._purge_deleted_services()
+ if not unmanaged: # cause then we're not deleting daemons
+ assert srv_name not in cephadm.spec_store, f'{cephadm.spec_store[srv_name]!r}'
+
+
+@contextmanager
+def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth=None, host: str = '', status_running=False) -> Iterator[List[str]]:
+ if spec.placement.is_empty() and host:
+ spec.placement = PlacementSpec(hosts=[host], count=1)
+ if meth is not None:
+ c = meth(cephadm_module, spec)
+ assert wait(cephadm_module, c) == f'Scheduled {spec.service_name()} update...'
+ else:
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == [f'Scheduled {spec.service_name()} update...']
+
+ specs = [d.spec for d in wait(cephadm_module, cephadm_module.describe_service())]
+ assert spec in specs
+
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ if status_running:
+ make_daemons_running(cephadm_module, spec.service_name())
+
+ dds = wait(cephadm_module, cephadm_module.list_daemons())
+ own_dds = [dd for dd in dds if dd.service_name() == spec.service_name()]
+ if host and spec.service_type != 'osd':
+ assert own_dds
+
+ yield [dd.name() for dd in own_dds]
+
+ assert_rm_service(cephadm_module, spec.service_name())
+
+
+def make_daemons_running(cephadm_module, service_name):
+ own_dds = cephadm_module.cache.get_daemons_by_service(service_name)
+ for dd in own_dds:
+ dd.status = DaemonDescriptionStatus.running # We're changing the reference
+
+
+def _deploy_cephadm_binary(host):
+ def foo(*args, **kwargs):
+ return True
+ return foo
diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py
new file mode 100644
index 000000000..524da9c00
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_autotune.py
@@ -0,0 +1,69 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+import pytest
+
+from cephadm.autotune import MemoryAutotuner
+from orchestrator import DaemonDescription
+
+
+@pytest.mark.parametrize("total,daemons,config,result",
+ [ # noqa: E128
+ (
+ 128 * 1024 * 1024 * 1024,
+ [],
+ {},
+ None,
+ ),
+ (
+ 128 * 1024 * 1024 * 1024,
+ [
+ DaemonDescription('osd', '1', 'host1'),
+ DaemonDescription('osd', '2', 'host1'),
+ ],
+ {},
+ 64 * 1024 * 1024 * 1024,
+ ),
+ (
+ 128 * 1024 * 1024 * 1024,
+ [
+ DaemonDescription('osd', '1', 'host1'),
+ DaemonDescription('osd', '2', 'host1'),
+ DaemonDescription('osd', '3', 'host1'),
+ ],
+ {
+ 'osd.3': 16 * 1024 * 1024 * 1024,
+ },
+ 56 * 1024 * 1024 * 1024,
+ ),
+ (
+ 128 * 1024 * 1024 * 1024,
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('osd', '1', 'host1'),
+ DaemonDescription('osd', '2', 'host1'),
+ ],
+ {},
+ 62 * 1024 * 1024 * 1024,
+ )
+ ])
+def test_autotune(total, daemons, config, result):
+ def fake_getter(who, opt):
+ if opt == 'osd_memory_target_autotune':
+ if who in config:
+ return False
+ else:
+ return True
+ if opt == 'osd_memory_target':
+ return config.get(who, 4 * 1024 * 1024 * 1024)
+ if opt == 'mds_cache_memory_limit':
+ return 16 * 1024 * 1024 * 1024
+
+ a = MemoryAutotuner(
+ total_mem=total,
+ daemons=daemons,
+ config_get=fake_getter,
+ )
+ val, osds = a.tune()
+ assert val == result
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
new file mode 100644
index 000000000..a6850f6cb
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -0,0 +1,1805 @@
+import json
+import logging
+from contextlib import contextmanager
+
+import pytest
+
+from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
+from cephadm.serve import CephadmServe
+from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
+
+try:
+ from typing import List
+except ImportError:
+ pass
+
+from execnet.gateway_bootstrap import HostNotFound
+
+from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, RGWSpec, \
+ NFSServiceSpec, IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec, MDSSpec
+from ceph.deployment.drive_selection.selector import DriveSelection
+from ceph.deployment.inventory import Devices, Device
+from ceph.utils import datetime_to_str, datetime_now
+from orchestrator import DaemonDescription, InventoryHost, \
+ HostSpec, OrchestratorError, DaemonDescriptionStatus, OrchestratorEvent
+from tests import mock
+from .fixtures import wait, _run_cephadm, match_glob, with_host, \
+ with_cephadm_module, with_service, _deploy_cephadm_binary, make_daemons_running
+from cephadm.module import CephadmOrchestrator
+
+"""
+TODOs:
+ There is really room for improvement here. I just quickly assembled theses tests.
+ I general, everything should be testes in Teuthology as well. Reasons for
+ also testing this here is the development roundtrip time.
+"""
+
+
+def assert_rm_daemon(cephadm: CephadmOrchestrator, prefix, host):
+ dds: List[DaemonDescription] = wait(cephadm, cephadm.list_daemons(host=host))
+ d_names = [dd.name() for dd in dds if dd.name().startswith(prefix)]
+ assert d_names
+ # there should only be one daemon (if not match_glob will throw mismatch)
+ assert len(d_names) == 1
+
+ c = cephadm.remove_daemons(d_names)
+ [out] = wait(cephadm, c)
+ # picking the 1st element is needed, rather than passing the list when the daemon
+ # name contains '-' char. If not, the '-' is treated as a range i.e. cephadm-exporter
+ # is treated like a m-e range which is invalid. rbd-mirror (d-m) and node-exporter (e-e)
+ # are valid, so pass without incident! Also, match_gob acts on strings anyway!
+ match_glob(out, f"Removed {d_names[0]}* from host '{host}'")
+
+
+@contextmanager
+def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, host: str):
+ spec.placement = PlacementSpec(hosts=[host], count=1)
+
+ c = cephadm_module.add_daemon(spec)
+ [out] = wait(cephadm_module, c)
+ match_glob(out, f"Deployed {spec.service_name()}.* on host '{host}'")
+
+ dds = cephadm_module.cache.get_daemons_by_service(spec.service_name())
+ for dd in dds:
+ if dd.hostname == host:
+ yield dd.daemon_id
+ assert_rm_daemon(cephadm_module, spec.service_name(), host)
+ return
+
+ assert False, 'Daemon not found'
+
+
+@contextmanager
+def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str, osd_id: int, ceph_volume_lvm_list=None):
+ cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+ 'osds': [
+ {
+ 'osd': 1,
+ 'up_from': 0,
+ 'up': True,
+ 'uuid': 'uuid'
+ }
+ ]
+ })
+
+ _run_cephadm.reset_mock(return_value=True, side_effect=True)
+ if ceph_volume_lvm_list:
+ _run_cephadm.side_effect = ceph_volume_lvm_list
+ else:
+ def _ceph_volume_list(s, host, entity, cmd, **kwargs):
+ logging.info(f'ceph-volume cmd: {cmd}')
+ if 'raw' in cmd:
+ return json.dumps({
+ "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": {
+ "ceph_fsid": cephadm_module._cluster_fsid,
+ "device": "/dev/loop0",
+ "osd_id": 21,
+ "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5",
+ "type": "bluestore"
+ },
+ }), '', 0
+ if 'lvm' in cmd:
+ return json.dumps({
+ str(osd_id): [{
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'data'
+ }]
+ }), '', 0
+ return '{}', '', 0
+
+ _run_cephadm.side_effect = _ceph_volume_list
+
+ assert cephadm_module._osd_activate(
+ [host]).stdout == f"Created osd(s) 1 on host '{host}'"
+ assert _run_cephadm.mock_calls == [
+ mock.call(host, 'osd', 'ceph-volume',
+ ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image=''),
+ mock.call(host, f'osd.{osd_id}', 'deploy',
+ ['--name', f'osd.{osd_id}', '--meta-json', mock.ANY,
+ '--config-json', '-', '--osd-fsid', 'uuid'],
+ stdin=mock.ANY, image=''),
+ mock.call(host, 'osd', 'ceph-volume',
+ ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image=''),
+ ]
+ dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host)
+ assert dd.name() == f'osd.{osd_id}'
+ yield dd
+ cephadm_module._remove_daemons([(f'osd.{osd_id}', host)])
+
+
+class TestCephadm(object):
+
+ def test_get_unique_name(self, cephadm_module):
+ # type: (CephadmOrchestrator) -> None
+ existing = [
+ DaemonDescription(daemon_type='mon', daemon_id='a')
+ ]
+ new_mon = cephadm_module.get_unique_name('mon', 'myhost', existing)
+ match_glob(new_mon, 'myhost')
+ new_mgr = cephadm_module.get_unique_name('mgr', 'myhost', existing)
+ match_glob(new_mgr, 'myhost.*')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_host(self, cephadm_module):
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+ with with_host(cephadm_module, 'test'):
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')]
+
+ # Be careful with backward compatibility when changing things here:
+ assert json.loads(cephadm_module.get_store('inventory')) == \
+ {"test": {"hostname": "test", "addr": "1::4", "labels": [], "status": ""}}
+
+ with with_host(cephadm_module, 'second', '1.2.3.5'):
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [
+ HostSpec('test', '1::4'),
+ HostSpec('second', '1.2.3.5')
+ ]
+
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '1::4')]
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ @mock.patch("cephadm.utils.resolve_ip")
+ def test_re_add_host_receive_loopback(self, resolve_ip, cephadm_module):
+ resolve_ip.side_effect = ['192.168.122.1', '127.0.0.1', '127.0.0.1']
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == []
+ cephadm_module._add_host(HostSpec('test', '192.168.122.1'))
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '192.168.122.1')]
+ cephadm_module._add_host(HostSpec('test'))
+ assert wait(cephadm_module, cephadm_module.get_hosts()) == [HostSpec('test', '192.168.122.1')]
+ with pytest.raises(OrchestratorError):
+ cephadm_module._add_host(HostSpec('test2'))
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_service_ls(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ c = cephadm_module.list_daemons(refresh=True)
+ assert wait(cephadm_module, c) == []
+ with with_service(cephadm_module, MDSSpec('mds', 'name', unmanaged=True)) as _, \
+ with_daemon(cephadm_module, MDSSpec('mds', 'name'), 'test') as _:
+
+ c = cephadm_module.list_daemons()
+
+ def remove_id_events(dd):
+ out = dd.to_json()
+ del out['daemon_id']
+ del out['events']
+ del out['daemon_name']
+ return out
+
+ assert [remove_id_events(dd) for dd in wait(cephadm_module, c)] == [
+ {
+ 'service_name': 'mds.name',
+ 'daemon_type': 'mds',
+ 'hostname': 'test',
+ 'status': 2,
+ 'status_desc': 'starting',
+ 'is_active': False,
+ 'ports': [],
+ }
+ ]
+
+ with with_service(cephadm_module, ServiceSpec('rgw', 'r.z'),
+ CephadmOrchestrator.apply_rgw, 'test', status_running=True):
+ make_daemons_running(cephadm_module, 'mds.name')
+
+ c = cephadm_module.describe_service()
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ expected = [
+ {
+ 'placement': {'count': 2},
+ 'service_id': 'name',
+ 'service_name': 'mds.name',
+ 'service_type': 'mds',
+ 'status': {'created': mock.ANY, 'running': 1, 'size': 2},
+ 'unmanaged': True
+ },
+ {
+ 'placement': {
+ 'count': 1,
+ 'hosts': ["test"]
+ },
+ 'service_id': 'r.z',
+ 'service_name': 'rgw.r.z',
+ 'service_type': 'rgw',
+ 'status': {'created': mock.ANY, 'running': 1, 'size': 1,
+ 'ports': [80]},
+ }
+ ]
+ for o in out:
+ if 'events' in o:
+ del o['events'] # delete it, as it contains a timestamp
+ assert out == expected
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_service_ls_service_type_flag(self, cephadm_module):
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)),
+ CephadmOrchestrator.apply_mgr, '', status_running=True):
+ with with_service(cephadm_module, MDSSpec('mds', 'test-id', placement=PlacementSpec(count=2)),
+ CephadmOrchestrator.apply_mds, '', status_running=True):
+
+ # with no service-type. Should provide info fot both services
+ c = cephadm_module.describe_service()
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ expected = [
+ {
+ 'placement': {'count': 2},
+ 'service_name': 'mgr',
+ 'service_type': 'mgr',
+ 'status': {'created': mock.ANY,
+ 'running': 2,
+ 'size': 2}
+ },
+ {
+ 'placement': {'count': 2},
+ 'service_id': 'test-id',
+ 'service_name': 'mds.test-id',
+ 'service_type': 'mds',
+ 'status': {'created': mock.ANY,
+ 'running': 2,
+ 'size': 2}
+ },
+ ]
+
+ for o in out:
+ if 'events' in o:
+ del o['events'] # delete it, as it contains a timestamp
+ assert out == expected
+
+ # with service-type. Should provide info fot only mds
+ c = cephadm_module.describe_service(service_type='mds')
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ expected = [
+ {
+ 'placement': {'count': 2},
+ 'service_id': 'test-id',
+ 'service_name': 'mds.test-id',
+ 'service_type': 'mds',
+ 'status': {'created': mock.ANY,
+ 'running': 2,
+ 'size': 2}
+ },
+ ]
+
+ for o in out:
+ if 'events' in o:
+ del o['events'] # delete it, as it contains a timestamp
+ assert out == expected
+
+ # service-type should not match with service names
+ c = cephadm_module.describe_service(service_type='mds.test-id')
+ out = [dict(o.to_json()) for o in wait(cephadm_module, c)]
+ assert out == []
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_device_ls(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ c = cephadm_module.get_inventory()
+ assert wait(cephadm_module, c) == [InventoryHost('test')]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name='rgw.myrgw.foobar',
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ ),
+ dict(
+ name='something.foo.bar',
+ style='cephadm',
+ fsid='fsid',
+ ),
+ dict(
+ name='haproxy.test.bar',
+ style='cephadm',
+ fsid='fsid',
+ ),
+
+ ])
+ ))
+ def test_list_daemons(self, cephadm_module: CephadmOrchestrator):
+ cephadm_module.service_cache_timeout = 10
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._refresh_host_daemons('test')
+ dds = wait(cephadm_module, cephadm_module.list_daemons())
+ assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'}
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_daemon_action(self, cephadm_module: CephadmOrchestrator):
+ cephadm_module.service_cache_timeout = 10
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \
+ with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id:
+
+ d_name = 'rgw.' + daemon_id
+
+ c = cephadm_module.daemon_action('redeploy', d_name)
+ assert wait(cephadm_module,
+ c) == f"Scheduled to redeploy rgw.{daemon_id} on host 'test'"
+
+ for what in ('start', 'stop', 'restart'):
+ c = cephadm_module.daemon_action(what, d_name)
+ assert wait(cephadm_module,
+ c) == F"Scheduled to {what} {d_name} on host 'test'"
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module._store['_ceph_get/mon_map'] = {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ }
+ cephadm_module.notify('mon_map', None)
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ assert cephadm_module.events.get_for_daemon(d_name) == [
+ OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO',
+ f"Deployed {d_name} on host \'test\'"),
+ OrchestratorEvent(mock.ANY, 'daemon', d_name, 'INFO',
+ f"stop {d_name} from host \'test\'"),
+ ]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_daemon_action_fail(self, cephadm_module: CephadmOrchestrator):
+ cephadm_module.service_cache_timeout = 10
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, RGWSpec(service_id='myrgw.foobar', unmanaged=True)) as _, \
+ with_daemon(cephadm_module, RGWSpec(service_id='myrgw.foobar'), 'test') as daemon_id:
+ with mock.patch('ceph_module.BaseMgrModule._ceph_send_command') as _ceph_send_command:
+
+ _ceph_send_command.side_effect = Exception("myerror")
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ cephadm_module.notify('mon_map', None)
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ evs = [e.message for e in cephadm_module.events.get_for_daemon(
+ f'rgw.{daemon_id}')]
+
+ assert 'myerror' in ''.join(evs)
+
+ @pytest.mark.parametrize(
+ "action",
+ [
+ 'start',
+ 'stop',
+ 'restart',
+ 'reconfig',
+ 'redeploy'
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_daemon_check(self, cephadm_module: CephadmOrchestrator, action):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test') as d_names:
+ [daemon_name] = d_names
+
+ cephadm_module._schedule_daemon_action(daemon_name, action)
+
+ assert cephadm_module.cache.get_scheduled_daemon_action(
+ 'test', daemon_name) == action
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_daemon_check_extra_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ with with_host(cephadm_module, 'test'):
+
+ # Also testing deploying mons without explicit network placement
+ cephadm_module.check_mon_command({
+ 'prefix': 'config set',
+ 'who': 'mon',
+ 'name': 'public_network',
+ 'value': '127.0.0.0/8'
+ })
+
+ cephadm_module.cache.update_host_devices_networks(
+ 'test',
+ [],
+ {
+ "127.0.0.0/8": [
+ "127.0.0.1"
+ ],
+ }
+ )
+
+ with with_service(cephadm_module, ServiceSpec(service_type='mon'), CephadmOrchestrator.apply_mon, 'test') as d_names:
+ [daemon_name] = d_names
+
+ cephadm_module._set_extra_ceph_conf('[mon]\nk=v')
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ _run_cephadm.assert_called_with(
+ 'test', 'mon.test', 'deploy', [
+ '--name', 'mon.test',
+ '--meta-json', '{"service_name": "mon", "ports": [], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-',
+ '--reconfig',
+ ],
+ stdin='{"config": "\\n\\n[mon]\\nk=v\\n[mon.test]\\npublic network = 127.0.0.0/8\\n", '
+ + '"keyring": "", "files": {"config": "[mon.test]\\npublic network = 127.0.0.0/8\\n"}}',
+ image='')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_extra_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='crash', extra_container_args=['--cpus=2', '--quiet']), CephadmOrchestrator.apply_crash):
+ _run_cephadm.assert_called_with(
+ 'test', 'crash.test', 'deploy', [
+ '--name', 'crash.test',
+ '--meta-json', '{"service_name": "crash", "ports": [], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": ["--cpus=2", "--quiet"]}',
+ '--config-json', '-',
+ '--extra-container-args=--cpus=2',
+ '--extra-container-args=--quiet'
+ ],
+ stdin='{"config": "", "keyring": ""}',
+ image='',
+ )
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='grafana'), CephadmOrchestrator.apply_grafana, 'test'):
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ cephadm_module.notify('mon_map', None)
+ cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
+ 'modules': ['dashboard']
+ })
+
+ with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
+ CephadmServe(cephadm_module)._check_daemons()
+ _mon_cmd.assert_any_call(
+ {'prefix': 'dashboard set-grafana-api-url', 'value': 'https://[1::4]:3000'},
+ None)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4')
+ def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator):
+ # https://tracker.ceph.com/issues/52866
+ with with_host(cephadm_module, 'test1'):
+ with with_host(cephadm_module, 'test2'):
+ with with_service(cephadm_module, IscsiServiceSpec(service_id='foobar', pool='pool', placement=PlacementSpec(host_pattern='*')), CephadmOrchestrator.apply_iscsi, 'test'):
+
+ CephadmServe(cephadm_module)._apply_all_services()
+ assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2
+
+ # get a deamons from postaction list (ARRGH sets!!)
+ tempset = cephadm_module.requires_post_actions.copy()
+ tempdeamon1 = tempset.pop()
+ tempdeamon2 = tempset.pop()
+
+ # make sure post actions has 2 daemons in it
+ assert len(cephadm_module.requires_post_actions) == 2
+
+ # replicate a host cache that is not in sync when check_daemons is called
+ tempdd1 = cephadm_module.cache.get_daemon(tempdeamon1)
+ tempdd2 = cephadm_module.cache.get_daemon(tempdeamon2)
+ host = 'test1'
+ if 'test1' not in tempdeamon1:
+ host = 'test2'
+ cephadm_module.cache.rm_daemon(host, tempdeamon1)
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
+ 'modified': datetime_to_str(datetime_now()),
+ 'fsid': 'foobar',
+ })
+ cephadm_module.notify('mon_map', None)
+ cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
+ 'modules': ['dashboard']
+ })
+
+ with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db:
+ CephadmServe(cephadm_module)._check_daemons()
+ _cfg_db.assert_called_once_with([tempdd2])
+
+ # post actions still has the other deamon in it and will run next _check_deamons
+ assert len(cephadm_module.requires_post_actions) == 1
+
+ # post actions was missed for a daemon
+ assert tempdeamon1 in cephadm_module.requires_post_actions
+
+ # put the daemon back in the cache
+ cephadm_module.cache.add_daemon(host, tempdd1)
+
+ _cfg_db.reset_mock()
+ # replicate serve loop running again
+ CephadmServe(cephadm_module)._check_daemons()
+
+ # post actions should have been called again
+ _cfg_db.asset_called()
+
+ # post actions is now empty
+ assert len(cephadm_module.requires_post_actions) == 0
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_mon_add(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec(service_type='mon', unmanaged=True)):
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps))
+ assert wait(cephadm_module, c) == ["Deployed mon.a on host 'test'"]
+
+ with pytest.raises(OrchestratorError, match="Must set public_network config option or specify a CIDR network,"):
+ ps = PlacementSpec(hosts=['test'], count=1)
+ c = cephadm_module.add_daemon(ServiceSpec('mon', placement=ps))
+ wait(cephadm_module, c)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_mgr_update(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+ assert r
+
+ assert_rm_daemon(cephadm_module, 'mgr.a', 'test')
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command")
+ def test_find_destroyed_osds(self, _mon_cmd, cephadm_module):
+ dict_out = {
+ "nodes": [
+ {
+ "id": -1,
+ "name": "default",
+ "type": "root",
+ "type_id": 11,
+ "children": [
+ -3
+ ]
+ },
+ {
+ "id": -3,
+ "name": "host1",
+ "type": "host",
+ "type_id": 1,
+ "pool_weights": {},
+ "children": [
+ 0
+ ]
+ },
+ {
+ "id": 0,
+ "device_class": "hdd",
+ "name": "osd.0",
+ "type": "osd",
+ "type_id": 0,
+ "crush_weight": 0.0243988037109375,
+ "depth": 2,
+ "pool_weights": {},
+ "exists": 1,
+ "status": "destroyed",
+ "reweight": 1,
+ "primary_affinity": 1
+ }
+ ],
+ "stray": []
+ }
+ json_out = json.dumps(dict_out)
+ _mon_cmd.return_value = (0, json_out, '')
+ osd_claims = OsdIdClaims(cephadm_module)
+ assert osd_claims.get() == {'host1': ['0']}
+ assert osd_claims.filtered_by_host('host1') == ['0']
+ assert osd_claims.filtered_by_host('host1.domain.com') == ['0']
+
+ @ pytest.mark.parametrize(
+ "ceph_services, cephadm_daemons, strays_expected, metadata",
+ # [ ([(daemon_type, daemon_id), ... ], [...], [...]), ... ]
+ [
+ (
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [],
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ {},
+ ),
+ (
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [],
+ {},
+ ),
+ (
+ [('mds', 'a'), ('osd', '0'), ('mgr', 'x')],
+ [('mds', 'a'), ('osd', '0')],
+ [('mgr', 'x')],
+ {},
+ ),
+ # https://tracker.ceph.com/issues/49573
+ (
+ [('rgw-nfs', '14649')],
+ [],
+ [('nfs', 'foo-rgw.host1')],
+ {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}},
+ ),
+ (
+ [('rgw-nfs', '14649'), ('rgw-nfs', '14650')],
+ [('nfs', 'foo-rgw.host1'), ('nfs', 'foo2.host2')],
+ [],
+ {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}},
+ ),
+ (
+ [('rgw-nfs', '14649'), ('rgw-nfs', '14650')],
+ [('nfs', 'foo-rgw.host1')],
+ [('nfs', 'foo2.host2')],
+ {'14649': {'id': 'nfs.foo-rgw.host1-rgw'}, '14650': {'id': 'nfs.foo2.host2-rgw'}},
+ ),
+ ]
+ )
+ def test_check_for_stray_daemons(
+ self,
+ cephadm_module,
+ ceph_services,
+ cephadm_daemons,
+ strays_expected,
+ metadata
+ ):
+ # mock ceph service-map
+ services = []
+ for service in ceph_services:
+ s = {'type': service[0], 'id': service[1]}
+ services.append(s)
+ ls = [{'hostname': 'host1', 'services': services}]
+
+ with mock.patch.object(cephadm_module, 'list_servers', mock.MagicMock()) as list_servers:
+ list_servers.return_value = ls
+ list_servers.__iter__.side_effect = ls.__iter__
+
+ # populate cephadm daemon cache
+ dm = {}
+ for daemon_type, daemon_id in cephadm_daemons:
+ dd = DaemonDescription(daemon_type=daemon_type, daemon_id=daemon_id)
+ dm[dd.name()] = dd
+ cephadm_module.cache.update_host_daemons('host1', dm)
+
+ def get_metadata_mock(svc_type, svc_id, default):
+ return metadata[svc_id]
+
+ with mock.patch.object(cephadm_module, 'get_metadata', new_callable=lambda: get_metadata_mock):
+
+ # test
+ CephadmServe(cephadm_module)._check_for_strays()
+
+ # verify
+ strays = cephadm_module.health_checks.get('CEPHADM_STRAY_DAEMON')
+ if not strays:
+ assert len(strays_expected) == 0
+ else:
+ for dt, di in strays_expected:
+ name = '%s.%s' % (dt, di)
+ for detail in strays['detail']:
+ if name in detail:
+ strays['detail'].remove(detail)
+ break
+ assert name in detail
+ assert len(strays['detail']) == 0
+ assert strays['count'] == len(strays_expected)
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.mon_command")
+ def test_find_destroyed_osds_cmd_failure(self, _mon_cmd, cephadm_module):
+ _mon_cmd.return_value = (1, "", "fail_msg")
+ with pytest.raises(OrchestratorError):
+ OsdIdClaims(cephadm_module)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_apply_osd_save(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test'):
+
+ spec = DriveGroupSpec(
+ service_id='foo',
+ placement=PlacementSpec(
+ host_pattern='*',
+ ),
+ data_devices=DeviceSelection(
+ all=True
+ )
+ )
+
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == ['Scheduled osd.foo update...']
+
+ inventory = Devices([
+ Device(
+ '/dev/sdb',
+ available=True
+ ),
+ ])
+
+ cephadm_module.cache.update_host_devices_networks('test', inventory.devices, {})
+
+ _run_cephadm.return_value = (['{}'], '', 0)
+
+ assert CephadmServe(cephadm_module)._apply_all_services() is False
+
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume',
+ ['--config-json', '-', '--', 'lvm', 'batch',
+ '--no-auto', '/dev/sdb', '--yes', '--no-systemd'],
+ env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, stdin='{"config": "", "keyring": ""}')
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test'):
+
+ spec = DriveGroupSpec(
+ service_id='noncollocated',
+ placement=PlacementSpec(
+ hosts=['test']
+ ),
+ data_devices=DeviceSelection(paths=['/dev/sdb']),
+ db_devices=DeviceSelection(paths=['/dev/sdc']),
+ wal_devices=DeviceSelection(paths=['/dev/sdd'])
+ )
+
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == ['Scheduled osd.noncollocated update...']
+
+ inventory = Devices([
+ Device('/dev/sdb', available=True),
+ Device('/dev/sdc', available=True),
+ Device('/dev/sdd', available=True)
+ ])
+
+ cephadm_module.cache.update_host_devices_networks('test', inventory.devices, {})
+
+ _run_cephadm.return_value = (['{}'], '', 0)
+
+ assert CephadmServe(cephadm_module)._apply_all_services() is False
+
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume',
+ ['--config-json', '-', '--', 'lvm', 'batch',
+ '--no-auto', '/dev/sdb', '--db-devices', '/dev/sdc',
+ '--wal-devices', '/dev/sdd', '--yes', '--no-systemd'],
+ env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'],
+ error_ok=True, stdin='{"config": "", "keyring": ""}')
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
+ _run_cephadm.assert_any_call(
+ 'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.module.SpecStore.save")
+ def test_apply_osd_save_placement(self, _save_spec, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ json_spec = {'service_type': 'osd', 'placement': {'host_pattern': 'test'},
+ 'service_id': 'foo', 'data_devices': {'all': True}}
+ spec = ServiceSpec.from_json(json_spec)
+ assert isinstance(spec, DriveGroupSpec)
+ c = cephadm_module.apply([spec])
+ assert wait(cephadm_module, c) == ['Scheduled osd.foo update...']
+ _save_spec.assert_called_with(spec)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_create_osds(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(paths=['']))
+ c = cephadm_module.create_osds(dg)
+ out = wait(cephadm_module, c)
+ assert out == "Created no osd(s) on host test; already created?"
+ bad_dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='invalid_hsot'),
+ data_devices=DeviceSelection(paths=['']))
+ c = cephadm_module.create_osds(bad_dg)
+ out = wait(cephadm_module, c)
+ assert "Invalid 'host:device' spec: host not found in cluster" in out
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_create_noncollocated_osd(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(paths=['']))
+ c = cephadm_module.create_osds(dg)
+ out = wait(cephadm_module, c)
+ assert out == "Created no osd(s) on host test; already created?"
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch('cephadm.services.osd.OSDService._run_ceph_volume_command')
+ @mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume')
+ @mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None)
+ @mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {})
+ def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(limit=5, rotational=1),
+ service_id='not_enough')
+
+ disks_found = [
+ '[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]']
+ d_to_cv.return_value = 'foo'
+ _run_cv_cmd.return_value = (disks_found, '', 0)
+ preview = cephadm_module.osd_service.generate_previews([dg], 'test')
+
+ for osd in preview:
+ assert 'notes' in osd
+ assert osd['notes'] == [
+ 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_prepare_drivegroup(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
+ data_devices=DeviceSelection(paths=['']))
+ out = cephadm_module.osd_service.prepare_drivegroup(dg)
+ assert len(out) == 1
+ f1 = out[0]
+ assert f1[0] == 'test'
+ assert isinstance(f1[1], DriveSelection)
+
+ @pytest.mark.parametrize(
+ "devices, preview, exp_commands",
+ [
+ # no preview and only one disk, prepare is used due the hack that is in place.
+ (['/dev/sda'], False, ["lvm batch --no-auto /dev/sda --yes --no-systemd"]),
+ # no preview and multiple disks, uses batch
+ (['/dev/sda', '/dev/sdb'], False,
+ ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd"]),
+ # preview and only one disk needs to use batch again to generate the preview
+ (['/dev/sda'], True, ["lvm batch --no-auto /dev/sda --yes --no-systemd --report --format json"]),
+ # preview and multiple disks work the same
+ (['/dev/sda', '/dev/sdb'], True,
+ ["CEPH_VOLUME_OSDSPEC_AFFINITY=test.spec lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd --report --format json"]),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(service_id='test.spec', placement=PlacementSpec(
+ host_pattern='test'), data_devices=DeviceSelection(paths=devices))
+ ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
+ preview = preview
+ out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview)
+ assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) for cmd in out), f'Expected cmds from f{out} in {exp_commands}'
+
+ @pytest.mark.parametrize(
+ "devices, preview, exp_commands",
+ [
+ # one data device, no preview
+ (['/dev/sda'], False, ["raw prepare --bluestore --data /dev/sda"]),
+ # multiple data devices, no preview
+ (['/dev/sda', '/dev/sdb'], False,
+ ["raw prepare --bluestore --data /dev/sda", "raw prepare --bluestore --data /dev/sdb"]),
+ # one data device, preview
+ (['/dev/sda'], True, ["raw prepare --bluestore --data /dev/sda --report --format json"]),
+ # multiple data devices, preview
+ (['/dev/sda', '/dev/sdb'], True,
+ ["raw prepare --bluestore --data /dev/sda --report --format json", "raw prepare --bluestore --data /dev/sdb --report --format json"]),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_raw_driveselection_to_ceph_volume(self, cephadm_module, devices, preview, exp_commands):
+ with with_host(cephadm_module, 'test'):
+ dg = DriveGroupSpec(service_id='test.spec', method='raw', placement=PlacementSpec(
+ host_pattern='test'), data_devices=DeviceSelection(paths=devices))
+ ds = DriveSelection(dg, Devices([Device(path) for path in devices]))
+ preview = preview
+ out = cephadm_module.osd_service.driveselection_to_ceph_volume(ds, [], preview)
+ assert all(any(cmd in exp_cmd for exp_cmd in exp_commands) for cmd in out), f'Expected cmds from f{out} in {exp_commands}'
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name='osd.0',
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ )
+ ])
+ ))
+ @mock.patch("cephadm.services.osd.OSD.exists", True)
+ @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0)
+ def test_remove_osds(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._refresh_host_daemons('test')
+ c = cephadm_module.list_daemons()
+ wait(cephadm_module, c)
+
+ c = cephadm_module.remove_daemons(['osd.0'])
+ out = wait(cephadm_module, c)
+ assert out == ["Removed osd.0 from host 'test'"]
+
+ cephadm_module.to_remove_osds.enqueue(OSD(osd_id=0,
+ replace=False,
+ force=False,
+ hostname='test',
+ process_started_at=datetime_now(),
+ remove_util=cephadm_module.to_remove_osds.rm_util
+ ))
+ cephadm_module.to_remove_osds.process_removal_queue()
+ assert cephadm_module.to_remove_osds == OSDRemovalQueue(cephadm_module)
+
+ c = cephadm_module.remove_osds_status()
+ out = wait(cephadm_module, c)
+ assert out == []
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_rgw_update(self, cephadm_module):
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ with with_service(cephadm_module, RGWSpec(service_id="foo", unmanaged=True)):
+ ps = PlacementSpec(hosts=['host1'], count=1)
+ c = cephadm_module.add_daemon(
+ RGWSpec(service_id="foo", placement=ps))
+ [out] = wait(cephadm_module, c)
+ match_glob(out, "Deployed rgw.foo.* on host 'host1'")
+
+ ps = PlacementSpec(hosts=['host1', 'host2'], count=2)
+ r = CephadmServe(cephadm_module)._apply_service(
+ RGWSpec(service_id="foo", placement=ps))
+ assert r
+
+ assert_rm_daemon(cephadm_module, 'rgw.foo', 'host1')
+ assert_rm_daemon(cephadm_module, 'rgw.foo', 'host2')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name='rgw.myrgw.myhost.myid',
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ version='version',
+ state='running',
+ )
+ ])
+ ))
+ def test_remove_daemon(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._refresh_host_daemons('test')
+ c = cephadm_module.list_daemons()
+ wait(cephadm_module, c)
+ c = cephadm_module.remove_daemons(['rgw.myrgw.myhost.myid'])
+ out = wait(cephadm_module, c)
+ assert out == ["Removed rgw.myrgw.myhost.myid from host 'test'"]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_remove_duplicate_osds(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'host1', 1) as dd1: # type: DaemonDescription
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'host2', 1) as dd2: # type: DaemonDescription
+ CephadmServe(cephadm_module)._check_for_moved_osds()
+ # both are in status "starting"
+ assert len(cephadm_module.cache.get_daemons()) == 2
+
+ dd1.status = DaemonDescriptionStatus.running
+ dd2.status = DaemonDescriptionStatus.error
+ cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1})
+ cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2})
+ CephadmServe(cephadm_module)._check_for_moved_osds()
+ assert len(cephadm_module.cache.get_daemons()) == 1
+
+ assert cephadm_module.events.get_for_daemon('osd.1') == [
+ OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+ "Deployed osd.1 on host 'host1'"),
+ OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+ "Deployed osd.1 on host 'host2'"),
+ OrchestratorEvent(mock.ANY, 'daemon', 'osd.1', 'INFO',
+ "Removed duplicated daemon on host 'host2'"),
+ ]
+
+ with pytest.raises(AssertionError):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': 'osd.1',
+ })
+
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': 'osd.1',
+ })
+
+ @pytest.mark.parametrize(
+ "spec",
+ [
+ ServiceSpec('crash'),
+ ServiceSpec('prometheus'),
+ ServiceSpec('grafana'),
+ ServiceSpec('node-exporter'),
+ ServiceSpec('alertmanager'),
+ ServiceSpec('rbd-mirror'),
+ ServiceSpec('cephfs-mirror'),
+ ServiceSpec('mds', service_id='fsname'),
+ RGWSpec(rgw_realm='realm', rgw_zone='zone'),
+ RGWSpec(service_id="foo"),
+ ServiceSpec('cephadm-exporter'),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test'))
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_daemon_add(self, spec: ServiceSpec, cephadm_module):
+ unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+ unmanaged_spec.unmanaged = True
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, unmanaged_spec):
+ with with_daemon(cephadm_module, spec, 'test'):
+ pass
+
+ @pytest.mark.parametrize(
+ "entity,success,spec",
+ [
+ ('mgr.x', True, ServiceSpec(
+ service_type='mgr',
+ placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ('client.rgw.x', True, ServiceSpec(
+ service_type='rgw',
+ service_id='id',
+ placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ('client.nfs.x', True, ServiceSpec(
+ service_type='nfs',
+ service_id='id',
+ placement=PlacementSpec(hosts=[HostPlacementSpec('test', '', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ('mon.', False, ServiceSpec(
+ service_type='mon',
+ placement=PlacementSpec(
+ hosts=[HostPlacementSpec('test', '127.0.0.0/24', 'x')], count=1),
+ unmanaged=True)
+ ), # noqa: E124
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+ def test_daemon_add_fail(self, _run_cephadm, entity, success, spec, cephadm_module):
+ _run_cephadm.return_value = '{}', '', 0
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.side_effect = OrchestratorError('fail')
+ with pytest.raises(OrchestratorError):
+ wait(cephadm_module, cephadm_module.add_daemon(spec))
+ if success:
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': entity,
+ })
+ else:
+ with pytest.raises(AssertionError):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': entity,
+ })
+ assert cephadm_module.events.get_for_service(spec.service_name()) == [
+ OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'INFO',
+ "service was created"),
+ OrchestratorEvent(mock.ANY, 'service', spec.service_name(), 'ERROR',
+ "fail"),
+ ]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_daemon_place_fail_health_warning(self, _run_cephadm, cephadm_module):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test'):
+ _run_cephadm.side_effect = OrchestratorError('fail')
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+ assert not r
+ assert cephadm_module.health_checks.get('CEPHADM_DAEMON_PLACE_FAIL') is not None
+ assert cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['count'] == 1
+ assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['summary']
+ assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['detail']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test'):
+ CephadmServe(cephadm_module)._apply_all_services()
+ ps = PlacementSpec(hosts=['fail'], count=1)
+ r = CephadmServe(cephadm_module)._apply_service(ServiceSpec('mgr', placement=ps))
+ assert not r
+ assert cephadm_module.apply_spec_fails
+ assert cephadm_module.health_checks.get('CEPHADM_APPLY_SPEC_FAIL') is not None
+ assert cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['count'] == 1
+ assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['summary']
+
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option")
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_invalid_config_option_health_warning(self, _run_cephadm, get_foreign_ceph_option, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1)
+ get_foreign_ceph_option.side_effect = KeyError
+ CephadmServe(cephadm_module)._apply_service_config(
+ ServiceSpec('mgr', placement=ps, config={'test': 'foo'}))
+ assert cephadm_module.health_checks.get('CEPHADM_INVALID_CONFIG_OPTION') is not None
+ assert cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['count'] == 1
+ assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks[
+ 'CEPHADM_INVALID_CONFIG_OPTION']['summary']
+ assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks[
+ 'CEPHADM_INVALID_CONFIG_OPTION']['detail']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+ def test_nfs(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test'], count=1)
+ spec = NFSServiceSpec(
+ service_id='name',
+ placement=ps)
+ unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+ unmanaged_spec.unmanaged = True
+ with with_service(cephadm_module, unmanaged_spec):
+ c = cephadm_module.add_daemon(spec)
+ [out] = wait(cephadm_module, c)
+ match_glob(out, "Deployed nfs.name.* on host 'test'")
+
+ assert_rm_daemon(cephadm_module, 'nfs.name.test', 'test')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("subprocess.run", None)
+ @mock.patch("cephadm.module.CephadmOrchestrator.rados", mock.MagicMock())
+ @mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+ def test_iscsi(self, cephadm_module):
+ with with_host(cephadm_module, 'test'):
+ ps = PlacementSpec(hosts=['test'], count=1)
+ spec = IscsiServiceSpec(
+ service_id='name',
+ pool='pool',
+ api_user='user',
+ api_password='password',
+ placement=ps)
+ unmanaged_spec = ServiceSpec.from_json(spec.to_json())
+ unmanaged_spec.unmanaged = True
+ with with_service(cephadm_module, unmanaged_spec):
+
+ c = cephadm_module.add_daemon(spec)
+ [out] = wait(cephadm_module, c)
+ match_glob(out, "Deployed iscsi.name.* on host 'test'")
+
+ assert_rm_daemon(cephadm_module, 'iscsi.name.test', 'test')
+
+ @pytest.mark.parametrize(
+ "on_bool",
+ [
+ True,
+ False
+ ]
+ )
+ @pytest.mark.parametrize(
+ "fault_ident",
+ [
+ 'fault',
+ 'ident'
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_blink_device_light(self, _run_cephadm, on_bool, fault_ident, cephadm_module):
+ _run_cephadm.return_value = '{}', '', 0
+ with with_host(cephadm_module, 'test'):
+ c = cephadm_module.blink_device_light(fault_ident, on_bool, [('test', '', 'dev')])
+ on_off = 'on' if on_bool else 'off'
+ assert wait(cephadm_module, c) == [f'Set {fault_ident} light for test: {on_off}']
+ _run_cephadm.assert_called_with('test', 'osd', 'shell', [
+ '--', 'lsmcli', f'local-disk-{fault_ident}-led-{on_off}', '--path', 'dev'], error_ok=True)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_blink_device_light_custom(self, _run_cephadm, cephadm_module):
+ _run_cephadm.return_value = '{}', '', 0
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.set_store('blink_device_light_cmd', 'echo hello')
+ c = cephadm_module.blink_device_light('ident', True, [('test', '', '/dev/sda')])
+ assert wait(cephadm_module, c) == ['Set ident light for test: on']
+ _run_cephadm.assert_called_with('test', 'osd', 'shell', [
+ '--', 'echo', 'hello'], error_ok=True)
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_blink_device_light_custom_per_host(self, _run_cephadm, cephadm_module):
+ _run_cephadm.return_value = '{}', '', 0
+ with with_host(cephadm_module, 'mgr0'):
+ cephadm_module.set_store('mgr0/blink_device_light_cmd',
+ 'xyz --foo --{{ ident_fault }}={{\'on\' if on else \'off\'}} \'{{ path or dev }}\'')
+ c = cephadm_module.blink_device_light(
+ 'fault', True, [('mgr0', 'SanDisk_X400_M.2_2280_512GB_162924424784', '')])
+ assert wait(cephadm_module, c) == [
+ 'Set fault light for mgr0:SanDisk_X400_M.2_2280_512GB_162924424784 on']
+ _run_cephadm.assert_called_with('mgr0', 'osd', 'shell', [
+ '--', 'xyz', '--foo', '--fault=on', 'SanDisk_X400_M.2_2280_512GB_162924424784'
+ ], error_ok=True)
+
+ @pytest.mark.parametrize(
+ "spec, meth",
+ [
+ (ServiceSpec('mgr'), CephadmOrchestrator.apply_mgr),
+ (ServiceSpec('crash'), CephadmOrchestrator.apply_crash),
+ (ServiceSpec('prometheus'), CephadmOrchestrator.apply_prometheus),
+ (ServiceSpec('grafana'), CephadmOrchestrator.apply_grafana),
+ (ServiceSpec('node-exporter'), CephadmOrchestrator.apply_node_exporter),
+ (ServiceSpec('alertmanager'), CephadmOrchestrator.apply_alertmanager),
+ (ServiceSpec('rbd-mirror'), CephadmOrchestrator.apply_rbd_mirror),
+ (ServiceSpec('cephfs-mirror'), CephadmOrchestrator.apply_rbd_mirror),
+ (ServiceSpec('mds', service_id='fsname'), CephadmOrchestrator.apply_mds),
+ (ServiceSpec(
+ 'mds', service_id='fsname',
+ placement=PlacementSpec(
+ hosts=[HostPlacementSpec(
+ hostname='test',
+ name='fsname',
+ network=''
+ )]
+ )
+ ), CephadmOrchestrator.apply_mds),
+ (RGWSpec(service_id='foo'), CephadmOrchestrator.apply_rgw),
+ (RGWSpec(
+ service_id='bar',
+ rgw_realm='realm', rgw_zone='zone',
+ placement=PlacementSpec(
+ hosts=[HostPlacementSpec(
+ hostname='test',
+ name='bar',
+ network=''
+ )]
+ )
+ ), CephadmOrchestrator.apply_rgw),
+ (NFSServiceSpec(
+ service_id='name',
+ ), CephadmOrchestrator.apply_nfs),
+ (IscsiServiceSpec(
+ service_id='name',
+ pool='pool',
+ api_user='user',
+ api_password='password'
+ ), CephadmOrchestrator.apply_iscsi),
+ (CustomContainerSpec(
+ service_id='hello-world',
+ image='docker.io/library/hello-world:latest',
+ uid=65534,
+ gid=65534,
+ dirs=['foo/bar'],
+ files={
+ 'foo/bar/xyz.conf': 'aaa\nbbb'
+ },
+ bind_mounts=[[
+ 'type=bind',
+ 'source=lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true'
+ ]],
+ volume_mounts={
+ 'foo/bar': '/foo/bar:Z'
+ },
+ args=['--no-healthcheck'],
+ envs=['SECRET=password'],
+ ports=[8080, 8443]
+ ), CephadmOrchestrator.apply_container),
+ (ServiceSpec('cephadm-exporter'), CephadmOrchestrator.apply_cephadm_exporter),
+ ]
+ )
+ @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test'))
+ @mock.patch("subprocess.run", None)
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.create_rados_config_obj", mock.MagicMock())
+ @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock())
+ @mock.patch("subprocess.run", mock.MagicMock())
+ def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec, meth, 'test'):
+ pass
+
+ @mock.patch("cephadm.serve.CephadmServe._deploy_cephadm_binary", _deploy_cephadm_binary('test'))
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_mds_config_purge(self, cephadm_module: CephadmOrchestrator):
+ spec = MDSSpec('mds', service_id='fsname', config={'test': 'foo'})
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec, host='test'):
+ ret, out, err = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': spec.service_name(),
+ 'key': 'mds_join_fs',
+ })
+ assert out == 'fsname'
+ ret, out, err = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': spec.service_name(),
+ 'key': 'mds_join_fs',
+ })
+ assert not out
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ @mock.patch("cephadm.services.cephadmservice.CephadmService.ok_to_stop")
+ def test_daemon_ok_to_stop(self, ok_to_stop, cephadm_module: CephadmOrchestrator):
+ spec = MDSSpec(
+ 'mds',
+ service_id='fsname',
+ placement=PlacementSpec(hosts=['host1', 'host2']),
+ config={'test': 'foo'}
+ )
+ with with_host(cephadm_module, 'host1'), with_host(cephadm_module, 'host2'):
+ c = cephadm_module.apply_mds(spec)
+ out = wait(cephadm_module, c)
+ match_glob(out, "Scheduled mds.fsname update...")
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ [daemon] = cephadm_module.cache.daemons['host1'].keys()
+
+ spec.placement.set_hosts(['host2'])
+
+ ok_to_stop.side_effect = False
+
+ c = cephadm_module.apply_mds(spec)
+ out = wait(cephadm_module, c)
+ match_glob(out, "Scheduled mds.fsname update...")
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ ok_to_stop.assert_called_with([daemon[4:]], force=True)
+
+ assert_rm_daemon(cephadm_module, spec.service_name(), 'host1') # verifies ok-to-stop
+ assert_rm_daemon(cephadm_module, spec.service_name(), 'host2')
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ @mock.patch("remoto.process.check")
+ def test_offline(self, _check, _get_connection, cephadm_module):
+ _check.return_value = '{}', '', 0
+ _get_connection.return_value = mock.Mock(), mock.Mock()
+ with with_host(cephadm_module, 'test'):
+ _get_connection.side_effect = HostNotFound
+ code, out, err = cephadm_module.check_host('test')
+ assert out == ''
+ assert "Host 'test' not found" in err
+
+ out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
+ assert out == HostSpec('test', '1::4', status='Offline').to_json()
+
+ _get_connection.side_effect = None
+ assert CephadmServe(cephadm_module)._check_host('test') is None
+ out = wait(cephadm_module, cephadm_module.get_hosts())[0].to_json()
+ assert out == HostSpec('test', '1::4').to_json()
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_dont_touch_offline_or_maintenance_host_daemons(self, cephadm_module):
+ # test daemons on offline/maint hosts not removed when applying specs
+ # test daemons not added to hosts in maint/offline state
+ with with_host(cephadm_module, 'test1'):
+ with with_host(cephadm_module, 'test2'):
+ with with_host(cephadm_module, 'test3'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*'))):
+ # should get a mgr on all 3 hosts
+ # CephadmServe(cephadm_module)._apply_all_services()
+ assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+
+ # put one host in offline state and one host in maintenance state
+ cephadm_module.offline_hosts = {'test2'}
+ cephadm_module.inventory._inventory['test3']['status'] = 'maintenance'
+ cephadm_module.inventory.save()
+
+ # being in offline/maint mode should disqualify hosts from being
+ # candidates for scheduling
+ candidates = [
+ h.hostname for h in cephadm_module._schedulable_hosts()]
+ assert 'test2' in candidates
+ assert 'test3' in candidates
+
+ unreachable = [h.hostname for h in cephadm_module._unreachable_hosts()]
+ assert 'test2' in unreachable
+ assert 'test3' in unreachable
+
+ with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))):
+ # re-apply services. No mgr should be removed from maint/offline hosts
+ # crash daemon should only be on host not in maint/offline mode
+ CephadmServe(cephadm_module)._apply_all_services()
+ assert len(cephadm_module.cache.get_daemons_by_type('mgr')) == 3
+ assert len(cephadm_module.cache.get_daemons_by_type('crash')) == 1
+
+ cephadm_module.offline_hosts = {}
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_enter_success(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.return_value = [''], ['something\nsuccess - systemd target xxx disabled'], 0
+ _host_ok.return_value = 0, 'it is okay'
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname))
+ # should not raise an error
+ retval = cephadm_module.enter_host_maintenance(hostname)
+ assert retval.result_str().startswith('Daemons for Ceph cluster')
+ assert not retval.exception_str
+ assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.CephadmOrchestrator._host_ok_to_stop")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_enter_failure(self, _hosts, _get_daemon_types, _host_ok, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.return_value = [''], ['something\nfailed - disable the target'], 0
+ _host_ok.return_value = 0, 'it is okay'
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname))
+
+ with pytest.raises(OrchestratorError, match='Failed to place host1 into maintenance for cluster fsid'):
+ cephadm_module.enter_host_maintenance(hostname)
+
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_exit_success(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.return_value = [''], [
+ 'something\nsuccess - systemd target xxx enabled and started'], 0
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance'))
+ # should not raise an error
+ retval = cephadm_module.exit_host_maintenance(hostname)
+ assert retval.result_str().startswith('Ceph cluster')
+ assert not retval.exception_str
+ assert not cephadm_module.inventory._inventory[hostname]['status']
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ @mock.patch("cephadm.module.HostCache.get_daemon_types")
+ @mock.patch("cephadm.module.HostCache.get_hosts")
+ def test_maintenance_exit_failure(self, _hosts, _get_daemon_types, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ hostname = 'host1'
+ _run_cephadm.return_value = [''], ['something\nfailed - unable to enable the target'], 0
+ _get_daemon_types.return_value = ['crash']
+ _hosts.return_value = [hostname, 'other_host']
+ cephadm_module.inventory.add_host(HostSpec(hostname, status='maintenance'))
+
+ with pytest.raises(OrchestratorError, match='Failed to exit maintenance state for host host1, cluster fsid'):
+ cephadm_module.exit_host_maintenance(hostname)
+
+ assert cephadm_module.inventory._inventory[hostname]['status'] == 'maintenance'
+
+ def test_stale_connections(self, cephadm_module):
+ class Connection(object):
+ """
+ A mocked connection class that only allows the use of the connection
+ once. If you attempt to use it again via a _check, it'll explode (go
+ boom!).
+
+ The old code triggers the boom. The new code checks the has_connection
+ and will recreate the connection.
+ """
+ fuse = False
+
+ @ staticmethod
+ def has_connection():
+ return False
+
+ def import_module(self, *args, **kargs):
+ return mock.Mock()
+
+ @ staticmethod
+ def exit():
+ pass
+
+ def _check(conn, *args, **kargs):
+ if conn.fuse:
+ raise Exception("boom: connection is dead")
+ else:
+ conn.fuse = True
+ return '{}', [], 0
+ with mock.patch("remoto.Connection", side_effect=[Connection(), Connection(), Connection()]):
+ with mock.patch("remoto.process.check", _check):
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ code, out, err = cephadm_module.check_host('test')
+ # First should succeed.
+ assert err == ''
+
+ # On second it should attempt to reuse the connection, where the
+ # connection is "down" so will recreate the connection. The old
+ # code will blow up here triggering the BOOM!
+ code, out, err = cephadm_module.check_host('test')
+ assert err == ''
+
+ @mock.patch("cephadm.module.CephadmOrchestrator._get_connection")
+ @mock.patch("remoto.process.check")
+ @mock.patch("cephadm.module.CephadmServe._write_remote_file")
+ def test_etc_ceph(self, _write_file, _check, _get_connection, cephadm_module):
+ _get_connection.return_value = mock.Mock(), mock.Mock()
+ _check.return_value = '{}', '', 0
+ _write_file.return_value = None
+
+ assert cephadm_module.manage_etc_ceph_ceph_conf is False
+
+ with with_host(cephadm_module, 'test'):
+ assert '/etc/ceph/ceph.conf' not in cephadm_module.cache.get_host_client_files('test')
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True)
+ cephadm_module.config_notify()
+ assert cephadm_module.manage_etc_ceph_ceph_conf is True
+
+ CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+ _write_file.assert_called_with('test', '/etc/ceph/ceph.conf', b'',
+ 0o644, 0, 0)
+
+ assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test')
+
+ # set extra config and expect that we deploy another ceph.conf
+ cephadm_module._set_extra_ceph_conf('[mon]\nk=v')
+ CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+ _write_file.assert_called_with('test', '/etc/ceph/ceph.conf',
+ b'\n\n[mon]\nk=v\n', 0o644, 0, 0)
+
+ # reload
+ cephadm_module.cache.last_client_files = {}
+ cephadm_module.cache.load()
+
+ assert '/etc/ceph/ceph.conf' in cephadm_module.cache.get_host_client_files('test')
+
+ # Make sure, _check_daemons does a redeploy due to monmap change:
+ before_digest = cephadm_module.cache.get_host_client_files('test')[
+ '/etc/ceph/ceph.conf'][0]
+ cephadm_module._set_extra_ceph_conf('[mon]\nk2=v2')
+ CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+ after_digest = cephadm_module.cache.get_host_client_files('test')[
+ '/etc/ceph/ceph.conf'][0]
+ assert before_digest != after_digest
+
+ def test_etc_ceph_init(self):
+ with with_cephadm_module({'manage_etc_ceph_ceph_conf': True}) as m:
+ assert m.manage_etc_ceph_ceph_conf is True
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_registry_login(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ def check_registry_credentials(url, username, password):
+ assert json.loads(cephadm_module.get_store('registry_credentials')) == {
+ 'url': url, 'username': username, 'password': password}
+
+ _run_cephadm.return_value = '{}', '', 0
+ with with_host(cephadm_module, 'test'):
+ # test successful login with valid args
+ code, out, err = cephadm_module.registry_login('test-url', 'test-user', 'test-password')
+ assert out == 'registry login scheduled'
+ assert err == ''
+ check_registry_credentials('test-url', 'test-user', 'test-password')
+
+ # test bad login attempt with invalid args
+ code, out, err = cephadm_module.registry_login('bad-args')
+ assert err == ("Invalid arguments. Please provide arguments <url> <username> <password> "
+ "or -i <login credentials json file>")
+ check_registry_credentials('test-url', 'test-user', 'test-password')
+
+ # test bad login using invalid json file
+ code, out, err = cephadm_module.registry_login(
+ None, None, None, '{"bad-json": "bad-json"}')
+ assert err == ("json provided for custom registry login did not include all necessary fields. "
+ "Please setup json file as\n"
+ "{\n"
+ " \"url\": \"REGISTRY_URL\",\n"
+ " \"username\": \"REGISTRY_USERNAME\",\n"
+ " \"password\": \"REGISTRY_PASSWORD\"\n"
+ "}\n")
+ check_registry_credentials('test-url', 'test-user', 'test-password')
+
+ # test good login using valid json file
+ good_json = ("{\"url\": \"" + "json-url" + "\", \"username\": \"" + "json-user" + "\", "
+ " \"password\": \"" + "json-pass" + "\"}")
+ code, out, err = cephadm_module.registry_login(None, None, None, good_json)
+ assert out == 'registry login scheduled'
+ assert err == ''
+ check_registry_credentials('json-url', 'json-user', 'json-pass')
+
+ # test bad login where args are valid but login command fails
+ _run_cephadm.return_value = '{}', 'error', 1
+ code, out, err = cephadm_module.registry_login('fail-url', 'fail-user', 'fail-password')
+ assert err == 'Host test failed to login to fail-url as fail-user with given password'
+ check_registry_credentials('json-url', 'json-user', 'json-pass')
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+ 'image_id': 'image_id',
+ 'repo_digests': ['image@repo_digest'],
+ })))
+ @pytest.mark.parametrize("use_repo_digest",
+ [
+ False,
+ True
+ ])
+ def test_upgrade_run(self, use_repo_digest, cephadm_module: CephadmOrchestrator):
+ cephadm_module.use_repo_digest = use_repo_digest
+
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ cephadm_module.set_container_image('global', 'image')
+
+ if use_repo_digest:
+
+ CephadmServe(cephadm_module).convert_tags_to_repo_digest()
+
+ _, image, _ = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': 'global',
+ 'key': 'container_image',
+ })
+ if use_repo_digest:
+ assert image == 'image@repo_digest'
+ else:
+ assert image == 'image'
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ceph_volume_no_filter_for_batch(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ error_message = """cephadm exited with an error code: 1, stderr:/usr/bin/podman:stderr usage: ceph-volume inventory [-h] [--format {plain,json,json-pretty}] [path]/usr/bin/podman:stderr ceph-volume inventory: error: unrecognized arguments: --filter-for-batch
+Traceback (most recent call last):
+ File "<stdin>", line 6112, in <module>
+ File "<stdin>", line 1299, in _infer_fsid
+ File "<stdin>", line 1382, in _infer_image
+ File "<stdin>", line 3612, in command_ceph_volume
+ File "<stdin>", line 1061, in call_throws"""
+
+ with with_host(cephadm_module, 'test'):
+ _run_cephadm.reset_mock()
+ _run_cephadm.side_effect = OrchestratorError(error_message)
+
+ s = CephadmServe(cephadm_module)._refresh_host_devices('test')
+ assert s == 'host test `cephadm ceph-volume` failed: ' + error_message
+
+ assert _run_cephadm.mock_calls == [
+ mock.call('test', 'osd', 'ceph-volume',
+ ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='',
+ no_fsid=False),
+ mock.call('test', 'osd', 'ceph-volume',
+ ['--', 'inventory', '--format=json-pretty'], image='',
+ no_fsid=False),
+ ]
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_activate_datadevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1):
+ pass
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_activate_datadevice_fail(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+ 'osds': [
+ {
+ 'osd': 1,
+ 'up_from': 0,
+ 'uuid': 'uuid'
+ }
+ ]
+ })
+
+ ceph_volume_lvm_list = {
+ '1': [{
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'data'
+ }]
+ }
+ _run_cephadm.reset_mock(return_value=True)
+
+ def _r_c(*args, **kwargs):
+ if 'ceph-volume' in args:
+ return (json.dumps(ceph_volume_lvm_list), '', 0)
+ else:
+ assert 'deploy' in args
+ raise OrchestratorError("let's fail somehow")
+ _run_cephadm.side_effect = _r_c
+ assert cephadm_module._osd_activate(
+ ['test']).stderr == "let's fail somehow"
+ with pytest.raises(AssertionError):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'auth rm',
+ 'entity': 'osd.1',
+ })
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_activate_datadevice_dbdevice(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+
+ def _ceph_volume_list(s, host, entity, cmd, **kwargs):
+ logging.info(f'ceph-volume cmd: {cmd}')
+ if 'raw' in cmd:
+ return json.dumps({
+ "21a4209b-f51b-4225-81dc-d2dca5b8b2f5": {
+ "ceph_fsid": "64c84f19-fe1d-452a-a731-ab19dc144aa8",
+ "device": "/dev/loop0",
+ "osd_id": 21,
+ "osd_uuid": "21a4209b-f51b-4225-81dc-d2dca5b8b2f5",
+ "type": "bluestore"
+ },
+ }), '', 0
+ if 'lvm' in cmd:
+ return json.dumps({
+ '1': [{
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'data'
+ }, {
+ 'tags': {
+ 'ceph.cluster_fsid': cephadm_module._cluster_fsid,
+ 'ceph.osd_fsid': 'uuid'
+ },
+ 'type': 'db'
+ }]
+ }), '', 0
+ return '{}', '', 0
+
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1, ceph_volume_lvm_list=_ceph_volume_list):
+ pass
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_osd_count(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ dg = DriveGroupSpec(service_id='', data_devices=DeviceSelection(all=True))
+ with with_host(cephadm_module, 'test', refresh_hosts=False):
+ with with_service(cephadm_module, dg, host='test'):
+ with with_osd_daemon(cephadm_module, _run_cephadm, 'test', 1):
+ assert wait(cephadm_module, cephadm_module.describe_service())[0].size == 1
+
+ @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+ def test_host_rm_last_admin(self, cephadm_module: CephadmOrchestrator):
+ with pytest.raises(OrchestratorError):
+ with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=False):
+ cephadm_module.inventory.add_label('test', '_admin')
+ pass
+ assert False
+ with with_host(cephadm_module, 'test1', refresh_hosts=False, rm_with_force=True):
+ with with_host(cephadm_module, 'test2', refresh_hosts=False, rm_with_force=False):
+ cephadm_module.inventory.add_label('test2', '_admin')
diff --git a/src/pybind/mgr/cephadm/tests/test_completion.py b/src/pybind/mgr/cephadm/tests/test_completion.py
new file mode 100644
index 000000000..327c12d2a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_completion.py
@@ -0,0 +1,40 @@
+import pytest
+
+from ..module import forall_hosts
+
+
+class TestCompletion(object):
+
+ @pytest.mark.parametrize("input,expected", [
+ ([], []),
+ ([1], ["(1,)"]),
+ (["hallo"], ["('hallo',)"]),
+ ("hi", ["('h',)", "('i',)"]),
+ (list(range(5)), [str((x, )) for x in range(5)]),
+ ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+ ])
+ def test_async_map(self, input, expected, cephadm_module):
+ @forall_hosts
+ def run_forall(*args):
+ return str(args)
+ assert run_forall(input) == expected
+
+ @pytest.mark.parametrize("input,expected", [
+ ([], []),
+ ([1], ["(1,)"]),
+ (["hallo"], ["('hallo',)"]),
+ ("hi", ["('h',)", "('i',)"]),
+ (list(range(5)), [str((x, )) for x in range(5)]),
+ ([(1, 2), (3, 4)], ["(1, 2)", "(3, 4)"]),
+ ])
+ def test_async_map_self(self, input, expected, cephadm_module):
+ class Run(object):
+ def __init__(self):
+ self.attr = 1
+
+ @forall_hosts
+ def run_forall(self, *args):
+ assert self.attr == 1
+ return str(args)
+
+ assert Run().run_forall(input) == expected
diff --git a/src/pybind/mgr/cephadm/tests/test_configchecks.py b/src/pybind/mgr/cephadm/tests/test_configchecks.py
new file mode 100644
index 000000000..3cae0a27d
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_configchecks.py
@@ -0,0 +1,668 @@
+import copy
+import json
+import logging
+import ipaddress
+import pytest
+import uuid
+
+from time import time as now
+
+from ..configchecks import CephadmConfigChecks
+from ..inventory import HostCache
+from ..upgrade import CephadmUpgrade, UpgradeState
+from orchestrator import DaemonDescription
+
+from typing import List, Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+host_sample = {
+ "arch": "x86_64",
+ "bios_date": "04/01/2014",
+ "bios_version": "F2",
+ "cpu_cores": 16,
+ "cpu_count": 2,
+ "cpu_load": {
+ "15min": 0.0,
+ "1min": 0.01,
+ "5min": 0.01
+ },
+ "cpu_model": "Intel® Xeon® Processor E5-2698 v3",
+ "cpu_threads": 64,
+ "flash_capacity": "4.0TB",
+ "flash_capacity_bytes": 4000797868032,
+ "flash_count": 2,
+ "flash_list": [
+ {
+ "description": "ATA CT2000MX500SSD1 (2.0TB)",
+ "dev_name": "sda",
+ "disk_size_bytes": 2000398934016,
+ "model": "CT2000MX500SSD1",
+ "rev": "023",
+ "vendor": "ATA",
+ "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0"
+ },
+ {
+ "description": "ATA CT2000MX500SSD1 (2.0TB)",
+ "dev_name": "sdb",
+ "disk_size_bytes": 2000398934016,
+ "model": "CT2000MX500SSD1",
+ "rev": "023",
+ "vendor": "ATA",
+ "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0"
+ },
+ ],
+ "hdd_capacity": "16.0TB",
+ "hdd_capacity_bytes": 16003148120064,
+ "hdd_count": 4,
+ "hdd_list": [
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sdc",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sdd",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sde",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ {
+ "description": "ST4000VN008-2DR1 (4.0TB)",
+ "dev_name": "sdf",
+ "disk_size_bytes": 4000787030016,
+ "model": "ST4000VN008-2DR1",
+ "rev": "SC60",
+ "vendor": "ATA",
+ "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
+ },
+ ],
+ "hostname": "dummy",
+ "interfaces": {
+ "eth0": {
+ "driver": "e1000e",
+ "iftype": "physical",
+ "ipv4_address": "10.7.17.1/24",
+ "ipv6_address": "fe80::215:17ff:feab:50e2/64",
+ "lower_devs_list": [],
+ "mtu": 9000,
+ "nic_type": "ethernet",
+ "operstate": "up",
+ "speed": 1000,
+ "upper_devs_list": [],
+ },
+ "eth1": {
+ "driver": "e1000e",
+ "iftype": "physical",
+ "ipv4_address": "10.7.18.1/24",
+ "ipv6_address": "fe80::215:17ff:feab:50e2/64",
+ "lower_devs_list": [],
+ "mtu": 9000,
+ "nic_type": "ethernet",
+ "operstate": "up",
+ "speed": 1000,
+ "upper_devs_list": [],
+ },
+ "eth2": {
+ "driver": "r8169",
+ "iftype": "physical",
+ "ipv4_address": "10.7.19.1/24",
+ "ipv6_address": "fe80::76d4:35ff:fe58:9a79/64",
+ "lower_devs_list": [],
+ "mtu": 1500,
+ "nic_type": "ethernet",
+ "operstate": "up",
+ "speed": 1000,
+ "upper_devs_list": []
+ },
+ },
+ "kernel": "4.18.0-240.10.1.el8_3.x86_64",
+ "kernel_parameters": {
+ "net.ipv4.ip_nonlocal_bind": "0",
+ },
+ "kernel_security": {
+ "SELINUX": "enforcing",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(enforcing, targeted)",
+ "type": "SELinux"
+ },
+ "memory_available_kb": 19489212,
+ "memory_free_kb": 245164,
+ "memory_total_kb": 32900916,
+ "model": "StorageHeavy",
+ "nic_count": 3,
+ "operating_system": "Red Hat Enterprise Linux 8.3 (Ootpa)",
+ "subscribed": "Yes",
+ "system_uptime": 777600.0,
+ "timestamp": now(),
+ "vendor": "Ceph Servers Inc",
+}
+
+
+def role_list(n: int) -> List[str]:
+ if n == 1:
+ return ['mon', 'mgr', 'osd']
+ if n in [2, 3]:
+ return ['mon', 'mds', 'osd']
+
+ return ['osd']
+
+
+def generate_testdata(count: int = 10, public_network: str = '10.7.17.0/24', cluster_network: str = '10.7.18.0/24'):
+ # public network = eth0, cluster_network = eth1
+ assert count > 3
+ assert public_network
+ num_disks = host_sample['hdd_count']
+ hosts = {}
+ daemons = {}
+ daemon_to_host = {}
+ osd_num = 0
+ public_netmask = public_network.split('/')[1]
+ cluster_ip_list = []
+ cluster_netmask = ''
+
+ public_ip_list = [str(i) for i in list(ipaddress.ip_network(public_network).hosts())]
+ if cluster_network:
+ cluster_ip_list = [str(i) for i in list(ipaddress.ip_network(cluster_network).hosts())]
+ cluster_netmask = cluster_network.split('/')[1]
+
+ for n in range(1, count + 1, 1):
+
+ new_host = copy.deepcopy(host_sample)
+ hostname = f"node-{n}.ceph.com"
+
+ new_host['hostname'] = hostname
+ new_host['interfaces']['eth0']['ipv4_address'] = f"{public_ip_list.pop(0)}/{public_netmask}"
+ if cluster_ip_list:
+ new_host['interfaces']['eth1']['ipv4_address'] = f"{cluster_ip_list.pop(0)}/{cluster_netmask}"
+ else:
+ new_host['interfaces']['eth1']['ipv4_address'] = ''
+
+ hosts[hostname] = new_host
+ daemons[hostname] = {}
+ for r in role_list(n):
+ name = ''
+ if r == 'osd':
+ for n in range(num_disks):
+ osd = DaemonDescription(
+ hostname=hostname, daemon_type='osd', daemon_id=osd_num)
+ name = f"osd.{osd_num}"
+ daemons[hostname][name] = osd
+ daemon_to_host[name] = hostname
+ osd_num += 1
+ else:
+ name = f"{r}.{hostname}"
+ daemons[hostname][name] = DaemonDescription(
+ hostname=hostname, daemon_type=r, daemon_id=hostname)
+ daemon_to_host[name] = hostname
+
+ logger.debug(f"daemon to host lookup - {json.dumps(daemon_to_host)}")
+ return hosts, daemons, daemon_to_host
+
+
+@pytest.fixture()
+def mgr():
+ """Provide a fake ceph mgr object preloaded with a configuration"""
+ mgr = FakeMgr()
+ mgr.cache.facts, mgr.cache.daemons, mgr.daemon_to_host = \
+ generate_testdata(public_network='10.9.64.0/24', cluster_network='')
+ mgr.module_option.update({
+ "config_checks_enabled": True,
+ })
+ yield mgr
+
+
+class FakeMgr:
+
+ def __init__(self):
+ self.datastore = {}
+ self.module_option = {}
+ self.health_checks = {}
+ self.default_version = 'quincy'
+ self.version_overrides = {}
+ self.daemon_to_host = {}
+
+ self.cache = HostCache(self)
+ self.upgrade = CephadmUpgrade(self)
+
+ def set_health_checks(self, checks: dict):
+ return
+
+ def get_module_option(self, keyname: str) -> Optional[str]:
+ return self.module_option.get(keyname, None)
+
+ def set_module_option(self, keyname: str, value: str) -> None:
+ return None
+
+ def get_store(self, keyname: str, default=None) -> Optional[str]:
+ return self.datastore.get(keyname, None)
+
+ def set_store(self, keyname: str, value: str) -> None:
+ self.datastore[keyname] = value
+ return None
+
+ def _ceph_get_server(self) -> None:
+ pass
+
+ def get_metadata(self, daemon_type: str, daemon_id: str) -> Dict[str, Any]:
+ key = f"{daemon_type}.{daemon_id}"
+ if key in self.version_overrides:
+ logger.debug(f"override applied for {key}")
+ version_str = self.version_overrides[key]
+ else:
+ version_str = self.default_version
+
+ return {"ceph_release": version_str, "hostname": self.daemon_to_host[key]}
+
+ def list_servers(self) -> List[Dict[str, List[Dict[str, str]]]]:
+ num_disks = host_sample['hdd_count']
+ osd_num = 0
+ service_map = []
+
+ for hostname in self.cache.facts:
+
+ host_num = int(hostname.split('.')[0].split('-')[1])
+ svc_list = []
+ for r in role_list(host_num):
+ if r == 'osd':
+ for _n in range(num_disks):
+ svc_list.append({
+ "type": "osd",
+ "id": osd_num,
+ })
+ osd_num += 1
+ else:
+ svc_list.append({
+ "type": r,
+ "id": hostname,
+ })
+
+ service_map.append({"services": svc_list})
+ logger.debug(f"services map - {json.dumps(service_map)}")
+ return service_map
+
+ def use_repo_digest(self) -> None:
+ return None
+
+
+class TestConfigCheck:
+
+ def test_to_json(self, mgr):
+ checker = CephadmConfigChecks(mgr)
+ out = checker.to_json()
+ assert out
+ assert len(out) == len(checker.health_checks)
+
+ def test_lookup_check(self, mgr):
+ checker = CephadmConfigChecks(mgr)
+ check = checker.lookup_check('osd_mtu_size')
+ logger.debug(json.dumps(check.to_json()))
+ assert check
+ assert check.healthcheck_name == "CEPHADM_CHECK_MTU"
+
+ def test_old_checks_removed(self, mgr):
+ mgr.datastore.update({
+ "config_checks": '{"bogus_one": "enabled", "bogus_two": "enabled", '
+ '"kernel_security": "enabled", "public_network": "enabled", '
+ '"kernel_version": "enabled", "network_missing": "enabled", '
+ '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled", '
+ '"os_subscription": "enabled", "ceph_release": "enabled"}'
+ })
+ checker = CephadmConfigChecks(mgr)
+ raw = mgr.get_store('config_checks')
+ checks = json.loads(raw)
+ assert "bogus_one" not in checks
+ assert "bogus_two" not in checks
+ assert len(checks) == len(checker.health_checks)
+
+ def test_new_checks(self, mgr):
+ mgr.datastore.update({
+ "config_checks": '{"kernel_security": "enabled", "public_network": "enabled", '
+ '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled"}'
+ })
+ checker = CephadmConfigChecks(mgr)
+ raw = mgr.get_store('config_checks')
+ checks = json.loads(raw)
+ assert len(checks) == len(checker.health_checks)
+
+ def test_no_issues(self, mgr):
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+ checker.run_checks()
+
+ assert not mgr.health_checks
+
+ def test_no_public_network(self, mgr):
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['ipv4_address'] = "192.168.1.20/24"
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+ checker.run_checks()
+ logger.debug(mgr.health_checks)
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_PUBLIC_MEMBERSHIP' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_PUBLIC_MEMBERSHIP']['detail'][0] == \
+ 'node-1.ceph.com does not have an interface on any public network'
+
+ def test_missing_networks(self, mgr):
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.66.0/24']
+ checker.run_checks()
+
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_NETWORK_MISSING' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_NETWORK_MISSING']['detail'][0] == \
+ "10.9.66.0/24 not found on any host in the cluster"
+
+ def test_bad_mtu_single(self, mgr):
+
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['mtu'] = 1500
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_MTU']['detail'][0] == \
+ 'host node-1.ceph.com(eth0) is using MTU 1500 on 10.9.64.0/24, NICs on other hosts use 9000'
+
+ def test_bad_mtu_multiple(self, mgr):
+
+ for n in [1, 5]:
+ bad_node = mgr.cache.facts[f'node-{n}.ceph.com']
+ bad_node['interfaces']['eth0']['mtu'] = 1500
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert "CEPHADM_CHECK_MTU" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_MTU']['count'] == 2
+
+ def test_bad_linkspeed_single(self, mgr):
+
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['speed'] = 100
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert "CEPHADM_CHECK_LINKSPEED" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_LINKSPEED']['detail'][0] == \
+ 'host node-1.ceph.com(eth0) has linkspeed of 100 on 10.9.64.0/24, NICs on other hosts use 1000'
+
+ def test_super_linkspeed_single(self, mgr):
+
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['speed'] = 10000
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert not mgr.health_checks
+
+ def test_release_mismatch_single(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert mgr.health_checks
+ assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail'][0] == \
+ 'osd.1 is running pacific (majority of cluster is using quincy)'
+
+ def test_release_mismatch_multi(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ "osd.5": "octopus",
+ }
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert mgr.health_checks
+ assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and len(mgr.health_checks) == 1
+ assert len(mgr.health_checks['CEPHADM_CHECK_CEPH_RELEASE']['detail']) == 2
+
+ def test_kernel_mismatch(self, mgr):
+
+ bad_host = mgr.cache.facts['node-1.ceph.com']
+ bad_host['kernel'] = "5.10.18.0-241.10.1.el8.x86_64"
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_KERNEL_VERSION' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['detail'][0] == \
+ "host node-1.ceph.com running kernel 5.10, majority of hosts(9) running 4.18"
+ assert mgr.health_checks['CEPHADM_CHECK_KERNEL_VERSION']['count'] == 1
+
+ def test_inconsistent_subscription(self, mgr):
+
+ bad_host = mgr.cache.facts['node-5.ceph.com']
+ bad_host['subscribed'] = "no"
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert len(mgr.health_checks) == 1
+ assert "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_SUBSCRIPTION']['detail'][0] == \
+ "node-5.ceph.com does not have an active subscription"
+
+ def test_kernel_security_inconsistent(self, mgr):
+
+ bad_node = mgr.cache.facts['node-3.ceph.com']
+ bad_node['kernel_security'] = {
+ "SELINUX": "permissive",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(permissive, targeted)",
+ "type": "SELinux"
+ }
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ assert len(mgr.health_checks) == 1
+ assert 'CEPHADM_CHECK_KERNEL_LSM' in mgr.health_checks
+ assert mgr.health_checks['CEPHADM_CHECK_KERNEL_LSM']['detail'][0] == \
+ "node-3.ceph.com has inconsistent KSM settings compared to the majority of hosts(9) in the cluster"
+
+ def test_release_and_bad_mtu(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+ bad_node = mgr.cache.facts['node-1.ceph.com']
+ bad_node['interfaces']['eth0']['mtu'] = 1500
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert len(mgr.health_checks) == 2
+ assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+ "CEPHADM_CHECK_MTU" in mgr.health_checks
+
+ def test_release_mtu_LSM(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+ bad_node1 = mgr.cache.facts['node-1.ceph.com']
+ bad_node1['interfaces']['eth0']['mtu'] = 1500
+ bad_node2 = mgr.cache.facts['node-3.ceph.com']
+ bad_node2['kernel_security'] = {
+ "SELINUX": "permissive",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(permissive, targeted)",
+ "type": "SELinux"
+ }
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert len(mgr.health_checks) == 3
+ assert \
+ "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+ "CEPHADM_CHECK_MTU" in mgr.health_checks and \
+ "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks
+
+ def test_release_mtu_LSM_subscription(self, mgr):
+
+ mgr.version_overrides = {
+ "osd.1": "pacific",
+ }
+ bad_node1 = mgr.cache.facts['node-1.ceph.com']
+ bad_node1['interfaces']['eth0']['mtu'] = 1500
+ bad_node1['subscribed'] = "no"
+ bad_node2 = mgr.cache.facts['node-3.ceph.com']
+ bad_node2['kernel_security'] = {
+ "SELINUX": "permissive",
+ "SELINUXTYPE": "targeted",
+ "description": "SELinux: Enabled(permissive, targeted)",
+ "type": "SELinux"
+ }
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(json.dumps(mgr.health_checks))
+ logger.info(checker.subnet_lookup)
+ assert mgr.health_checks
+ assert len(mgr.health_checks) == 4
+ assert \
+ "CEPHADM_CHECK_CEPH_RELEASE" in mgr.health_checks and \
+ "CEPHADM_CHECK_MTU" in mgr.health_checks and \
+ "CEPHADM_CHECK_KERNEL_LSM" in mgr.health_checks and \
+ "CEPHADM_CHECK_SUBSCRIPTION" in mgr.health_checks
+
+ def test_skip_release_during_upgrade(self, mgr):
+ mgr.upgrade.upgrade_state = UpgradeState.from_json({
+ 'target_name': 'wah',
+ 'progress_id': str(uuid.uuid4()),
+ 'target_id': 'wah',
+ 'error': '',
+ 'paused': False,
+ })
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(f"{checker.skipped_checks_count} skipped check(s): {checker.skipped_checks}")
+ assert checker.skipped_checks_count == 1
+ assert 'ceph_release' in checker.skipped_checks
+
+ def test_skip_when_disabled(self, mgr):
+ mgr.module_option.update({
+ "config_checks_enabled": "false"
+ })
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(checker.active_checks)
+ logger.info(checker.defined_checks)
+ assert checker.active_checks_count == 0
+
+ def test_skip_mtu_checks(self, mgr):
+ mgr.datastore.update({
+ 'config_checks': '{"osd_mtu_size": "disabled"}'
+ })
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(checker.active_checks)
+ logger.info(checker.defined_checks)
+ assert 'osd_mtu_size' not in checker.active_checks
+ assert checker.defined_checks == 8 and checker.active_checks_count == 7
+
+ def test_skip_mtu_lsm_checks(self, mgr):
+ mgr.datastore.update({
+ 'config_checks': '{"osd_mtu_size": "disabled", "kernel_security": "disabled"}'
+ })
+
+ checker = CephadmConfigChecks(mgr)
+ checker.cluster_network_list = []
+ checker.public_network_list = ['10.9.64.0/24']
+
+ checker.run_checks()
+ logger.info(checker.active_checks)
+ logger.info(checker.defined_checks)
+ assert 'osd_mtu_size' not in checker.active_checks and \
+ 'kernel_security' not in checker.active_checks
+ assert checker.defined_checks == 8 and checker.active_checks_count == 6
+ assert not mgr.health_checks
diff --git a/src/pybind/mgr/cephadm/tests/test_facts.py b/src/pybind/mgr/cephadm/tests/test_facts.py
new file mode 100644
index 000000000..6c33f5368
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_facts.py
@@ -0,0 +1,10 @@
+from ..import CephadmOrchestrator
+
+from .fixtures import wait
+
+
+def test_facts(cephadm_module: CephadmOrchestrator):
+ facts = {'node-1.ceph.com': {'bios_version': 'F2', 'cpu_cores': 16}}
+ cephadm_module.cache.facts = facts
+ ret_facts = cephadm_module.get_facts('node-1.ceph.com')
+ assert wait(cephadm_module, ret_facts) == [{'bios_version': 'F2', 'cpu_cores': 16}]
diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py
new file mode 100644
index 000000000..b95f54f7c
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_migration.py
@@ -0,0 +1,229 @@
+import json
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec
+from ceph.utils import datetime_to_str, datetime_now
+from cephadm import CephadmOrchestrator
+from cephadm.inventory import SPEC_STORE_PREFIX
+from cephadm.migrations import LAST_MIGRATION
+from cephadm.tests.fixtures import _run_cephadm, wait, with_host
+from cephadm.serve import CephadmServe
+from tests import mock
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_scheduler(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1', refresh_hosts=False):
+ with with_host(cephadm_module, 'host2', refresh_hosts=False):
+
+ # emulate the old scheduler:
+ c = cephadm_module.apply_rgw(
+ ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='*', count=2))
+ )
+ assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...'
+
+ # with pytest.raises(OrchestratorError, match="cephadm migration still ongoing. Please wait, until the migration is complete."):
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ cephadm_module.migration_current = 0
+ cephadm_module.migration.migrate()
+ # assert we need all daemons.
+ assert cephadm_module.migration_current == 0
+
+ CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+ cephadm_module.migration.migrate()
+
+ CephadmServe(cephadm_module)._apply_all_services()
+
+ out = {o.hostname for o in wait(cephadm_module, cephadm_module.list_daemons())}
+ assert out == {'host1', 'host2'}
+
+ c = cephadm_module.apply_rgw(
+ ServiceSpec('rgw', 'r.z', placement=PlacementSpec(host_pattern='host1', count=2))
+ )
+ assert wait(cephadm_module, c) == 'Scheduled rgw.r.z update...'
+
+ # Sorry, for this hack, but I need to make sure, Migration thinks,
+ # we have updated all daemons already.
+ cephadm_module.cache.last_daemon_update['host1'] = datetime_now()
+ cephadm_module.cache.last_daemon_update['host2'] = datetime_now()
+
+ cephadm_module.migration_current = 0
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current >= 2
+
+ out = [o.spec.placement for o in wait(
+ cephadm_module, cephadm_module.describe_service())]
+ assert out == [PlacementSpec(count=2, hosts=[HostPlacementSpec(
+ hostname='host1', network='', name=''), HostPlacementSpec(hostname='host2', network='', name='')])]
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mon_one(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({
+ 'spec': {
+ 'service_type': 'mon',
+ 'service_id': 'wrong',
+ 'placement': {
+ 'hosts': ['host1']
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+
+ cephadm_module.spec_store.load()
+
+ assert len(cephadm_module.spec_store.all_specs) == 1
+ assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon'
+
+ cephadm_module.migration_current = 1
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current >= 2
+
+ assert len(cephadm_module.spec_store.all_specs) == 1
+ assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
+ service_type='mon',
+ unmanaged=True,
+ placement=PlacementSpec(hosts=['host1'])
+ )
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mon_two(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon', json.dumps({
+ 'spec': {
+ 'service_type': 'mon',
+ 'placement': {
+ 'count': 5,
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mon.wrong', json.dumps({
+ 'spec': {
+ 'service_type': 'mon',
+ 'service_id': 'wrong',
+ 'placement': {
+ 'hosts': ['host1']
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+
+ cephadm_module.spec_store.load()
+
+ assert len(cephadm_module.spec_store.all_specs) == 2
+ assert cephadm_module.spec_store.all_specs['mon.wrong'].service_name() == 'mon'
+ assert cephadm_module.spec_store.all_specs['mon'].service_name() == 'mon'
+
+ cephadm_module.migration_current = 1
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current >= 2
+
+ assert len(cephadm_module.spec_store.all_specs) == 1
+ assert cephadm_module.spec_store.all_specs['mon'] == ServiceSpec(
+ service_type='mon',
+ unmanaged=True,
+ placement=PlacementSpec(count=5)
+ )
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_service_id_mds_one(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(SPEC_STORE_PREFIX + 'mds', json.dumps({
+ 'spec': {
+ 'service_type': 'mds',
+ 'placement': {
+ 'hosts': ['host1']
+ }
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+
+ cephadm_module.spec_store.load()
+
+ # there is nothing to migrate, as the spec is gone now.
+ assert len(cephadm_module.spec_store.all_specs) == 0
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_nfs_initial(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(
+ SPEC_STORE_PREFIX + 'mds',
+ json.dumps({
+ 'spec': {
+ 'service_type': 'nfs',
+ 'service_id': 'foo',
+ 'placement': {
+ 'hosts': ['host1']
+ },
+ 'spec': {
+ 'pool': 'mypool',
+ 'namespace': 'foons',
+ },
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+ cephadm_module.migration_current = 1
+ cephadm_module.spec_store.load()
+
+ ls = json.loads(cephadm_module.get_store('nfs_migration_queue'))
+ assert ls == [['foo', 'mypool', 'foons']]
+
+ cephadm_module.migration.migrate(True)
+ assert cephadm_module.migration_current == 2
+
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_nfs_initial_octopus(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.set_store(
+ SPEC_STORE_PREFIX + 'mds',
+ json.dumps({
+ 'spec': {
+ 'service_type': 'nfs',
+ 'service_id': 'ganesha-foo',
+ 'placement': {
+ 'hosts': ['host1']
+ },
+ 'spec': {
+ 'pool': 'mypool',
+ 'namespace': 'foons',
+ },
+ },
+ 'created': datetime_to_str(datetime_now()),
+ }, sort_keys=True),
+ )
+ cephadm_module.migration_current = 1
+ cephadm_module.spec_store.load()
+
+ ls = json.loads(cephadm_module.get_store('nfs_migration_queue'))
+ assert ls == [['ganesha-foo', 'mypool', 'foons']]
+
+ cephadm_module.migration.migrate(True)
+ assert cephadm_module.migration_current == 2
+
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
+def test_migrate_admin_client_keyring(cephadm_module: CephadmOrchestrator):
+ assert 'client.admin' not in cephadm_module.keys.keys
+
+ cephadm_module.migration_current = 3
+ cephadm_module.migration.migrate()
+ assert cephadm_module.migration_current == LAST_MIGRATION
+
+ assert cephadm_module.keys.keys['client.admin'].placement.label == '_admin'
diff --git a/src/pybind/mgr/cephadm/tests/test_osd_removal.py b/src/pybind/mgr/cephadm/tests/test_osd_removal.py
new file mode 100644
index 000000000..6685fcb2a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_osd_removal.py
@@ -0,0 +1,298 @@
+import json
+
+from cephadm.services.osd import OSDRemovalQueue, OSD
+import pytest
+from tests import mock
+from .fixtures import with_cephadm_module
+from datetime import datetime
+
+
+class MockOSD:
+
+ def __init__(self, osd_id):
+ self.osd_id = osd_id
+
+
+class TestOSDRemoval:
+
+ @pytest.mark.parametrize(
+ "osd_id, osd_df, expected",
+ [
+ # missing 'nodes' key
+ (1, dict(nodes=[]), -1),
+ # missing 'pgs' key
+ (1, dict(nodes=[dict(id=1)]), -1),
+ # id != osd_id
+ (1, dict(nodes=[dict(id=999, pgs=1)]), -1),
+ # valid
+ (1, dict(nodes=[dict(id=1, pgs=1)]), 1),
+ ]
+ )
+ def test_get_pg_count(self, rm_util, osd_id, osd_df, expected):
+ with mock.patch("cephadm.services.osd.RemoveUtil.osd_df", return_value=osd_df):
+ assert rm_util.get_pg_count(osd_id) == expected
+
+ @pytest.mark.parametrize(
+ "osds, ok_to_stop, expected",
+ [
+ # no osd_ids provided
+ ([], [False], []),
+ # all osds are ok_to_stop
+ ([1, 2], [True], [1, 2]),
+ # osds are ok_to_stop after the second iteration
+ ([1, 2], [False, True], [2]),
+ # osds are never ok_to_stop, (taking the sample size `(len(osd_ids))` into account),
+ # expected to get False
+ ([1, 2], [False, False], []),
+ ]
+ )
+ def test_find_stop_threshold(self, rm_util, osds, ok_to_stop, expected):
+ with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop):
+ assert rm_util.find_osd_stop_threshold(osds) == expected
+
+ def test_process_removal_queue(self, rm_util):
+ # TODO: !
+ # rm_util.process_removal_queue()
+ pass
+
+ @pytest.mark.parametrize(
+ "max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected",
+ [
+ # drain one at a time, one already draining
+ (1, [1], [1], [True], 0),
+ # drain one at a time, none draining yet
+ (1, [], [1, 2, 3], [True, True, True], 1),
+ # drain one at a time, one already draining, none ok-to-stop
+ (1, [1], [1], [False], 0),
+ # drain one at a time, none draining, one ok-to-stop
+ (1, [], [1, 2, 3], [False, False, True], 1),
+ # drain three at a time, one already draining, all ok-to-stop
+ (3, [1], [1, 2, 3], [True, True, True], 2),
+ # drain two at a time, none already draining, none ok-to-stop
+ (2, [], [1, 2, 3], [False, False, False], 0),
+ # drain two at a time, none already draining, none idling
+ (2, [], [], [], 0),
+ ]
+ )
+ def test_ready_to_drain_osds(self, max_osd_draining_count, draining_osds, idling_osds, ok_to_stop, expected):
+ with with_cephadm_module({'max_osd_draining_count': max_osd_draining_count}) as m:
+ with mock.patch("cephadm.services.osd.OSDRemovalQueue.draining_osds", return_value=draining_osds):
+ with mock.patch("cephadm.services.osd.OSDRemovalQueue.idling_osds", return_value=idling_osds):
+ with mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop", side_effect=ok_to_stop):
+ removal_queue = OSDRemovalQueue(m)
+ assert len(removal_queue._ready_to_drain_osds()) == expected
+
+ def test_ok_to_stop(self, rm_util):
+ rm_util.ok_to_stop([MockOSD(1)])
+ rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd ok-to-stop', 'ids': ['1']},
+ error_ok=True)
+
+ def test_safe_to_destroy(self, rm_util):
+ rm_util.safe_to_destroy([1])
+ rm_util._run_mon_cmd.assert_called_with({'prefix': 'osd safe-to-destroy',
+ 'ids': ['1']}, error_ok=True)
+
+ def test_destroy_osd(self, rm_util):
+ rm_util.destroy_osd(1)
+ rm_util._run_mon_cmd.assert_called_with(
+ {'prefix': 'osd destroy-actual', 'id': 1, 'yes_i_really_mean_it': True})
+
+ def test_purge_osd(self, rm_util):
+ rm_util.purge_osd(1)
+ rm_util._run_mon_cmd.assert_called_with(
+ {'prefix': 'osd purge-actual', 'id': 1, 'yes_i_really_mean_it': True})
+
+ def test_load(self, cephadm_module, rm_util):
+ data = json.dumps([
+ {
+ "osd_id": 35,
+ "started": True,
+ "draining": True,
+ "stopped": False,
+ "replace": False,
+ "force": False,
+ "zap": False,
+ "nodename": "node2",
+ "drain_started_at": "2020-09-14T11:41:53.960463",
+ "drain_stopped_at": None,
+ "drain_done_at": None,
+ "process_started_at": "2020-09-14T11:41:52.245832"
+ }
+ ])
+ cephadm_module.set_store('osd_remove_queue', data)
+ cephadm_module.to_remove_osds.load_from_store()
+
+ expected = OSDRemovalQueue(cephadm_module)
+ expected.osds.add(OSD(osd_id=35, remove_util=rm_util, draining=True))
+ assert cephadm_module.to_remove_osds == expected
+
+
+class TestOSD:
+
+ def test_start(self, osd_obj):
+ assert osd_obj.started is False
+ osd_obj.start()
+ assert osd_obj.started is True
+ assert osd_obj.stopped is False
+
+ def test_start_draining_purge(self, osd_obj):
+ assert osd_obj.draining is False
+ assert osd_obj.drain_started_at is None
+ ret = osd_obj.start_draining()
+ osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 0.0)
+ assert isinstance(osd_obj.drain_started_at, datetime)
+ assert osd_obj.draining is True
+ assert osd_obj.replace is False
+ assert ret is True
+
+ def test_start_draining_replace(self, osd_obj):
+ assert osd_obj.draining is False
+ assert osd_obj.drain_started_at is None
+ osd_obj.replace = True
+ ret = osd_obj.start_draining()
+ osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'out')
+ assert isinstance(osd_obj.drain_started_at, datetime)
+ assert osd_obj.draining is True
+ assert osd_obj.replace is True
+ assert ret is True
+
+ def test_start_draining_stopped(self, osd_obj):
+ osd_obj.stopped = True
+ ret = osd_obj.start_draining()
+ assert osd_obj.drain_started_at is None
+ assert ret is False
+ assert osd_obj.draining is False
+
+ def test_stop_draining_replace(self, osd_obj):
+ osd_obj.replace = True
+ ret = osd_obj.stop_draining()
+ osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'in')
+ assert isinstance(osd_obj.drain_stopped_at, datetime)
+ assert osd_obj.draining is False
+ assert ret is True
+
+ def test_stop_draining_purge(self, osd_obj):
+ osd_obj.original_weight = 1.0
+ ret = osd_obj.stop_draining()
+ osd_obj.rm_util.reweight_osd.assert_called_with(osd_obj, 1.0)
+ assert isinstance(osd_obj.drain_stopped_at, datetime)
+ assert osd_obj.draining is False
+ assert ret is True
+
+ @mock.patch('cephadm.services.osd.OSD.stop_draining')
+ def test_stop(self, stop_draining_mock, osd_obj):
+ osd_obj.stop()
+ assert osd_obj.started is False
+ assert osd_obj.stopped is True
+ stop_draining_mock.assert_called_once()
+
+ @pytest.mark.parametrize(
+ "draining, empty, expected",
+ [
+ # must be !draining! and !not empty! to yield True
+ (True, not True, True),
+ # not draining and not empty
+ (False, not True, False),
+ # not draining and empty
+ (False, True, False),
+ # draining and empty
+ (True, True, False),
+ ]
+ )
+ def test_is_draining(self, osd_obj, draining, empty, expected):
+ with mock.patch("cephadm.services.osd.OSD.is_empty", new_callable=mock.PropertyMock(return_value=empty)):
+ osd_obj.draining = draining
+ assert osd_obj.is_draining is expected
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.ok_to_stop")
+ def test_is_ok_to_stop(self, _, osd_obj):
+ osd_obj.is_ok_to_stop
+ osd_obj.rm_util.ok_to_stop.assert_called_once()
+
+ @pytest.mark.parametrize(
+ "pg_count, expected",
+ [
+ (0, True),
+ (1, False),
+ (9999, False),
+ (-1, False),
+ ]
+ )
+ def test_is_empty(self, osd_obj, pg_count, expected):
+ with mock.patch("cephadm.services.osd.OSD.get_pg_count", return_value=pg_count):
+ assert osd_obj.is_empty is expected
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.safe_to_destroy")
+ def test_safe_to_destroy(self, _, osd_obj):
+ osd_obj.safe_to_destroy()
+ osd_obj.rm_util.safe_to_destroy.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.set_osd_flag")
+ def test_down(self, _, osd_obj):
+ osd_obj.down()
+ osd_obj.rm_util.set_osd_flag.assert_called_with([osd_obj], 'down')
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.destroy_osd")
+ def test_destroy_osd(self, _, osd_obj):
+ osd_obj.destroy()
+ osd_obj.rm_util.destroy_osd.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.purge_osd")
+ def test_purge(self, _, osd_obj):
+ osd_obj.purge()
+ osd_obj.rm_util.purge_osd.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count")
+ def test_pg_count(self, _, osd_obj):
+ osd_obj.get_pg_count()
+ osd_obj.rm_util.get_pg_count.assert_called_once()
+
+ def test_drain_status_human_not_started(self, osd_obj):
+ assert osd_obj.drain_status_human() == 'not started'
+
+ def test_drain_status_human_started(self, osd_obj):
+ osd_obj.started = True
+ assert osd_obj.drain_status_human() == 'started'
+
+ def test_drain_status_human_draining(self, osd_obj):
+ osd_obj.started = True
+ osd_obj.draining = True
+ assert osd_obj.drain_status_human() == 'draining'
+
+ def test_drain_status_human_done(self, osd_obj):
+ osd_obj.started = True
+ osd_obj.draining = False
+ osd_obj.drain_done_at = datetime.utcnow()
+ assert osd_obj.drain_status_human() == 'done, waiting for purge'
+
+
+class TestOSDRemovalQueue:
+
+ def test_queue_size(self, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ assert q.queue_size() == 0
+ q.osds.add(osd_obj)
+ assert q.queue_size() == 1
+
+ @mock.patch("cephadm.services.osd.OSD.start")
+ @mock.patch("cephadm.services.osd.OSD.exists")
+ def test_enqueue(self, exist, start, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ q.enqueue(osd_obj)
+ osd_obj.start.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.OSD.stop")
+ @mock.patch("cephadm.services.osd.OSD.exists")
+ def test_rm_raise(self, exist, stop, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ with pytest.raises(KeyError):
+ q.rm(osd_obj)
+ osd_obj.stop.assert_called_once()
+
+ @mock.patch("cephadm.services.osd.OSD.stop")
+ @mock.patch("cephadm.services.osd.OSD.exists")
+ def test_rm(self, exist, stop, osd_obj):
+ q = OSDRemovalQueue(mock.Mock())
+ q.osds.add(osd_obj)
+ q.rm(osd_obj)
+ osd_obj.stop.assert_called_once()
diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py
new file mode 100644
index 000000000..2454dc0d1
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py
@@ -0,0 +1,1591 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+from typing import NamedTuple, List, Dict, Optional
+import pytest
+
+from ceph.deployment.hostspec import HostSpec
+from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, IngressSpec
+from ceph.deployment.hostspec import SpecValidationError
+
+from cephadm.module import HostAssignment
+from cephadm.schedule import DaemonPlacement
+from orchestrator import DaemonDescription, OrchestratorValidationError, OrchestratorError
+
+
+def wrapper(func):
+ # some odd thingy to revert the order or arguments
+ def inner(*args):
+ def inner2(expected):
+ func(expected, *args)
+ return inner2
+ return inner
+
+
+@wrapper
+def none(expected):
+ assert expected == []
+
+
+@wrapper
+def one_of(expected, *hosts):
+ if not isinstance(expected, list):
+ assert False, str(expected)
+ assert len(expected) == 1, f'one_of failed len({expected}) != 1'
+ assert expected[0] in hosts
+
+
+@wrapper
+def two_of(expected, *hosts):
+ if not isinstance(expected, list):
+ assert False, str(expected)
+ assert len(expected) == 2, f'one_of failed len({expected}) != 2'
+ matches = 0
+ for h in hosts:
+ matches += int(h in expected)
+ if matches != 2:
+ assert False, f'two of {hosts} not in {expected}'
+
+
+@wrapper
+def exactly(expected, *hosts):
+ assert expected == list(hosts)
+
+
+@wrapper
+def error(expected, kind, match):
+ assert isinstance(expected, kind), (str(expected), match)
+ assert str(expected) == match, (str(expected), match)
+
+
+@wrapper
+def _or(expected, *inners):
+ def catch(inner):
+ try:
+ inner(expected)
+ except AssertionError as e:
+ return e
+ result = [catch(i) for i in inners]
+ if None not in result:
+ assert False, f"_or failed: {expected}"
+
+
+def _always_true(_):
+ pass
+
+
+def k(s):
+ return [e for e in s.split(' ') if e]
+
+
+def get_result(key, results):
+ def match(one):
+ for o, k in zip(one, key):
+ if o != k and o != '*':
+ return False
+ return True
+ return [v for k, v in results if match(k)][0]
+
+
+def mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count):
+
+ if spec_section == 'hosts':
+ mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731
+ hosts=explicit,
+ count=count,
+ ))
+ elif spec_section == 'label':
+ mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731
+ label='mylabel',
+ count=count,
+ ))
+ elif spec_section == 'host_pattern':
+ pattern = {
+ 'e': 'notfound',
+ '1': '1',
+ '12': '[1-2]',
+ '123': '*',
+ }[explicit_key]
+ mk_spec = lambda: ServiceSpec('mgr', placement=PlacementSpec( # noqa: E731
+ host_pattern=pattern,
+ count=count,
+ ))
+ else:
+ assert False
+
+ hosts = [
+ HostSpec(h, labels=['mylabel']) if h in explicit else HostSpec(h)
+ for h in hosts
+ ]
+
+ return mk_spec, hosts
+
+
+def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems):
+ key = ' '.join('N' if e is None else str(e) for e in key_elems)
+ try:
+ assert_res = get_result(k(key), results)
+ except IndexError:
+ try:
+ spec = mk_spec()
+ host_res, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=hosts,
+ unreachable_hosts=[],
+ daemons=daemons,
+ ).place()
+ if isinstance(host_res, list):
+ e = ', '.join(repr(h.hostname) for h in host_res)
+ assert False, f'`(k("{key}"), exactly({e})),` not found'
+ assert False, f'`(k("{key}"), ...),` not found'
+ except OrchestratorError as e:
+ assert False, f'`(k("{key}"), error({type(e).__name__}, {repr(str(e))})),` not found'
+
+ for _ in range(10): # scheduler has a random component
+ try:
+ spec = mk_spec()
+ host_res, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=hosts,
+ unreachable_hosts=[],
+ daemons=daemons
+ ).place()
+
+ assert_res(sorted([h.hostname for h in host_res]))
+ except Exception as e:
+ assert_res(e)
+
+
+@pytest.mark.parametrize("dp,n,result",
+ [ # noqa: E128
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+ 0,
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80]),
+ 2,
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82]),
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[80, 90]),
+ 2,
+ DaemonPlacement(daemon_type='mgr', hostname='host1', ports=[82, 92]),
+ ),
+ ])
+def test_daemon_placement_renumber(dp, n, result):
+ assert dp.renumber_ports(n) == result
+
+
+@pytest.mark.parametrize(
+ 'dp,dd,result',
+ [
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1'),
+ DaemonDescription('mgr', 'a', 'host1'),
+ True
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'),
+ DaemonDescription('mgr', 'a', 'host1'),
+ True
+ ),
+ (
+ DaemonPlacement(daemon_type='mon', hostname='host1', name='a'),
+ DaemonDescription('mgr', 'a', 'host1'),
+ False
+ ),
+ (
+ DaemonPlacement(daemon_type='mgr', hostname='host1', name='a'),
+ DaemonDescription('mgr', 'b', 'host1'),
+ False
+ ),
+ ])
+def test_daemon_placement_match(dp, dd, result):
+ assert dp.matches_daemon(dd) == result
+
+
+# * first match from the top wins
+# * where e=[], *=any
+#
+# + list of known hosts available for scheduling (host_key)
+# | + hosts used for explict placement (explicit_key)
+# | | + count
+# | | | + section (host, label, pattern)
+# | | | | + expected result
+# | | | | |
+test_explicit_scheduler_results = [
+ (k("* * 0 *"), error(SpecValidationError, 'num/count must be >= 1')),
+ (k("* e N l"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label mylabel')),
+ (k("* e N p"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr>: No matching hosts')),
+ (k("* e N h"), error(OrchestratorValidationError, 'placement spec is empty: no hosts, no label, no pattern, no count')),
+ (k("* e * *"), none),
+ (k("1 12 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2: Unknown hosts")),
+ (k("1 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts")),
+ (k("1 * * *"), exactly('1')),
+ (k("12 1 * *"), exactly('1')),
+ (k("12 12 1 *"), one_of('1', '2')),
+ (k("12 12 * *"), exactly('1', '2')),
+ (k("12 123 * h"), error(OrchestratorValidationError, "Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts")),
+ (k("12 123 1 *"), one_of('1', '2', '3')),
+ (k("12 123 * *"), two_of('1', '2', '3')),
+ (k("123 1 * *"), exactly('1')),
+ (k("123 12 1 *"), one_of('1', '2')),
+ (k("123 12 * *"), exactly('1', '2')),
+ (k("123 123 1 *"), one_of('1', '2', '3')),
+ (k("123 123 2 *"), two_of('1', '2', '3')),
+ (k("123 123 * *"), exactly('1', '2', '3')),
+]
+
+
+@pytest.mark.parametrize("spec_section_key,spec_section",
+ [ # noqa: E128
+ ('h', 'hosts'),
+ ('l', 'label'),
+ ('p', 'host_pattern'),
+ ])
+@pytest.mark.parametrize("count",
+ [ # noqa: E128
+ None,
+ 0,
+ 1,
+ 2,
+ 3,
+ ])
+@pytest.mark.parametrize("explicit_key, explicit",
+ [ # noqa: E128
+ ('e', []),
+ ('1', ['1']),
+ ('12', ['1', '2']),
+ ('123', ['1', '2', '3']),
+ ])
+@pytest.mark.parametrize("host_key, hosts",
+ [ # noqa: E128
+ ('1', ['1']),
+ ('12', ['1', '2']),
+ ('123', ['1', '2', '3']),
+ ])
+def test_explicit_scheduler(host_key, hosts,
+ explicit_key, explicit,
+ count,
+ spec_section_key, spec_section):
+
+ mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count)
+ run_scheduler_test(
+ results=test_explicit_scheduler_results,
+ mk_spec=mk_spec,
+ hosts=hosts,
+ daemons=[],
+ key_elems=(host_key, explicit_key, count, spec_section_key)
+ )
+
+
+# * first match from the top wins
+# * where e=[], *=any
+#
+# + list of known hosts available for scheduling (host_key)
+# | + hosts used for explict placement (explicit_key)
+# | | + count
+# | | | + existing daemons
+# | | | | + section (host, label, pattern)
+# | | | | | + expected result
+# | | | | | |
+test_scheduler_daemons_results = [
+ (k("* 1 * * *"), exactly('1')),
+ (k("1 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 2, 3: Unknown hosts')),
+ (k("1 123 * * *"), exactly('1')),
+ (k("12 123 * * h"), error(OrchestratorValidationError, 'Cannot place <ServiceSpec for service_name=mgr> on 3: Unknown hosts')),
+ (k("12 123 N * *"), exactly('1', '2')),
+ (k("12 123 1 * *"), one_of('1', '2')),
+ (k("12 123 2 * *"), exactly('1', '2')),
+ (k("12 123 3 * *"), exactly('1', '2')),
+ (k("123 123 N * *"), exactly('1', '2', '3')),
+ (k("123 123 1 e *"), one_of('1', '2', '3')),
+ (k("123 123 1 1 *"), exactly('1')),
+ (k("123 123 1 3 *"), exactly('3')),
+ (k("123 123 1 12 *"), one_of('1', '2')),
+ (k("123 123 1 112 *"), one_of('1', '2')),
+ (k("123 123 1 23 *"), one_of('2', '3')),
+ (k("123 123 1 123 *"), one_of('1', '2', '3')),
+ (k("123 123 2 e *"), two_of('1', '2', '3')),
+ (k("123 123 2 1 *"), _or(exactly('1', '2'), exactly('1', '3'))),
+ (k("123 123 2 3 *"), _or(exactly('1', '3'), exactly('2', '3'))),
+ (k("123 123 2 12 *"), exactly('1', '2')),
+ (k("123 123 2 112 *"), exactly('1', '2')),
+ (k("123 123 2 23 *"), exactly('2', '3')),
+ (k("123 123 2 123 *"), two_of('1', '2', '3')),
+ (k("123 123 3 * *"), exactly('1', '2', '3')),
+]
+
+
+@pytest.mark.parametrize("spec_section_key,spec_section",
+ [ # noqa: E128
+ ('h', 'hosts'),
+ ('l', 'label'),
+ ('p', 'host_pattern'),
+ ])
+@pytest.mark.parametrize("daemons_key, daemons",
+ [ # noqa: E128
+ ('e', []),
+ ('1', ['1']),
+ ('3', ['3']),
+ ('12', ['1', '2']),
+ ('112', ['1', '1', '2']), # deal with existing co-located daemons
+ ('23', ['2', '3']),
+ ('123', ['1', '2', '3']),
+ ])
+@pytest.mark.parametrize("count",
+ [ # noqa: E128
+ None,
+ 1,
+ 2,
+ 3,
+ ])
+@pytest.mark.parametrize("explicit_key, explicit",
+ [ # noqa: E128
+ ('1', ['1']),
+ ('123', ['1', '2', '3']),
+ ])
+@pytest.mark.parametrize("host_key, hosts",
+ [ # noqa: E128
+ ('1', ['1']),
+ ('12', ['1', '2']),
+ ('123', ['1', '2', '3']),
+ ])
+def test_scheduler_daemons(host_key, hosts,
+ explicit_key, explicit,
+ count,
+ daemons_key, daemons,
+ spec_section_key, spec_section):
+ mk_spec, hosts = mk_spec_and_host(spec_section, hosts, explicit_key, explicit, count)
+ dds = [
+ DaemonDescription('mgr', d, d)
+ for d in daemons
+ ]
+ run_scheduler_test(
+ results=test_scheduler_daemons_results,
+ mk_spec=mk_spec,
+ hosts=hosts,
+ daemons=dds,
+ key_elems=(host_key, explicit_key, count, daemons_key, spec_section_key)
+ )
+
+
+# =========================
+
+
+class NodeAssignmentTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
+ post_rank_map: Optional[Dict[int, Dict[int, Optional[str]]]]
+ expected: List[str]
+ expected_add: List[str]
+ expected_remove: List[DaemonDescription]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,rank_map,post_rank_map,expected,expected_add,expected_remove",
+ [ # noqa: E128
+ # just hosts
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(hosts=['smithi060']),
+ ['smithi060'],
+ [],
+ None, None,
+ ['mgr:smithi060'], ['mgr:smithi060'], []
+ ),
+ # all_hosts
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(host_pattern='*'),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host3'],
+ []
+ ),
+ # all_hosts + count_per_host
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(host_pattern='*', count_per_host=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mds', 'a', 'host1'),
+ DaemonDescription('mds', 'b', 'host2'),
+ ],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # count that is bigger than the amount of hosts. Truncate to len(hosts)
+ # mgr should not be co-located to each other.
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=4),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ []
+ ),
+ # count that is bigger than the amount of hosts; wrap around.
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(count=6),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # count + partial host list
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=3, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ None, None,
+ ['mgr:host3'],
+ ['mgr:host3'],
+ ['mgr.a', 'mgr.b']
+ ),
+ # count + partial host list (with colo)
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(count=3, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mds', 'a', 'host1'),
+ DaemonDescription('mds', 'b', 'host2'),
+ ],
+ None, None,
+ ['mds:host3', 'mds:host3', 'mds:host3'],
+ ['mds:host3', 'mds:host3', 'mds:host3'],
+ ['mds.a', 'mds.b']
+ ),
+ # count 1 + partial host list
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ None, None,
+ ['mgr:host3'],
+ ['mgr:host3'],
+ ['mgr.a', 'mgr.b']
+ ),
+ # count + partial host list + existing
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ ],
+ None, None,
+ ['mgr:host3'],
+ ['mgr:host3'],
+ ['mgr.a']
+ ),
+ # count + partial host list + existing (deterministic)
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host1']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ ],
+ None, None,
+ ['mgr:host1'],
+ [],
+ []
+ ),
+ # count + partial host list + existing (deterministic)
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host1']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host2'),
+ ],
+ None, None,
+ ['mgr:host1'],
+ ['mgr:host1'],
+ ['mgr.a']
+ ),
+ # label only
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ []
+ ),
+ # label + count (truncate to host list)
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=4, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ ['mgr:host1', 'mgr:host2', 'mgr:host3'],
+ []
+ ),
+ # label + count (with colo)
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(count=6, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # label only + count_per_hst
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(label='foo', count_per_host=3),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3',
+ 'mds:host1', 'mds:host2', 'mds:host3'],
+ ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3',
+ 'mds:host1', 'mds:host2', 'mds:host3'],
+ []
+ ),
+ # host_pattern
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(host_pattern='mgr*'),
+ 'mgrhost1 mgrhost2 datahost'.split(),
+ [],
+ None, None,
+ ['mgr:mgrhost1', 'mgr:mgrhost2'],
+ ['mgr:mgrhost1', 'mgr:mgrhost2'],
+ []
+ ),
+ # host_pattern + count_per_host
+ NodeAssignmentTest(
+ 'mds',
+ PlacementSpec(host_pattern='mds*', count_per_host=3),
+ 'mdshost1 mdshost2 datahost'.split(),
+ [],
+ None, None,
+ ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'],
+ ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'],
+ []
+ ),
+ # label + count_per_host + ports
+ NodeAssignmentTest(
+ 'rgw',
+ PlacementSpec(count=6, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ None, None,
+ ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ []
+ ),
+ # label + count_per_host + ports (+ xisting)
+ NodeAssignmentTest(
+ 'rgw',
+ PlacementSpec(count=6, label='foo'),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('rgw', 'a', 'host1', ports=[81]),
+ DaemonDescription('rgw', 'b', 'host2', ports=[80]),
+ DaemonDescription('rgw', 'c', 'host1', ports=[82]),
+ ],
+ None, None,
+ ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ ['rgw:host1(*:80)', 'rgw:host3(*:80)',
+ 'rgw:host2(*:81)', 'rgw:host3(*:81)'],
+ ['rgw.c']
+ ),
+ # cephadm.py teuth case
+ NodeAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=3, hosts=['host1=y', 'host2=x']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('mgr', 'y', 'host1'),
+ DaemonDescription('mgr', 'x', 'host2'),
+ ],
+ None, None,
+ ['mgr:host1(name=y)', 'mgr:host2(name=x)'],
+ [], []
+ ),
+
+ # note: host -> rank mapping is psuedo-random based on svc name, so these
+ # host/rank pairs may seem random but they match the nfs.mynfs seed used by
+ # the test.
+
+ # ranked, fresh
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [],
+ {},
+ {0: {0: None}, 1: {0: None}, 2: {0: None}},
+ ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
+ ['nfs:host3(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host1(rank=2.0)'],
+ []
+ ),
+ # 21: ranked, exist
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+ ],
+ {0: {1: '0.1'}},
+ {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}},
+ ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
+ ['nfs:host3(rank=1.0)', 'nfs:host2(rank=2.0)'],
+ []
+ ),
+ # ranked, exist, different ranks
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+ DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1),
+ ],
+ {0: {1: '0.1'}, 1: {1: '1.1'}},
+ {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}},
+ ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.1)', 'nfs:host3(rank=2.0)'],
+ ['nfs:host3(rank=2.0)'],
+ []
+ ),
+ # ranked, exist, different ranks (2)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1),
+ DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1),
+ ],
+ {0: {1: '0.1'}, 1: {1: '1.1'}},
+ {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}},
+ ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.1)', 'nfs:host2(rank=2.0)'],
+ ['nfs:host2(rank=2.0)'],
+ []
+ ),
+ # ranked, exist, extra ranks
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+ DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+ DaemonDescription('nfs', '4.5', 'host2', rank=4, rank_generation=5),
+ ],
+ {0: {5: '0.5'}, 1: {5: '1.5'}},
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {0: None}},
+ ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)', 'nfs:host3(rank=2.0)'],
+ ['nfs:host3(rank=2.0)'],
+ ['nfs.4.5']
+ ),
+ # 25: ranked, exist, extra ranks (scale down: kill off high rank)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host3 host2 host1'.split(),
+ [
+ DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+ DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+ DaemonDescription('nfs', '2.5', 'host3', rank=2, rank_generation=5),
+ ],
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+ ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)'],
+ [],
+ ['nfs.2.5']
+ ),
+ # ranked, exist, extra ranks (scale down hosts)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5),
+ DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5),
+ DaemonDescription('nfs', '2.5', 'host3', rank=4, rank_generation=5),
+ ],
+ {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}},
+ {0: {5: '0.5'}, 1: {5: '1.5', 6: None}, 2: {5: '2.5'}},
+ ['nfs:host1(rank=0.5)', 'nfs:host3(rank=1.6)'],
+ ['nfs:host3(rank=1.6)'],
+ ['nfs.2.5', 'nfs.1.5']
+ ),
+ # ranked, exist, duplicate rank
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.0', 'host1', rank=0, rank_generation=0),
+ DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1),
+ DaemonDescription('nfs', '1.2', 'host3', rank=1, rank_generation=2),
+ ],
+ {0: {0: '0.0'}, 1: {2: '1.2'}},
+ {0: {0: '0.0'}, 1: {2: '1.2'}, 2: {0: None}},
+ ['nfs:host1(rank=0.0)', 'nfs:host3(rank=1.2)', 'nfs:host2(rank=2.0)'],
+ ['nfs:host2(rank=2.0)'],
+ ['nfs.1.1']
+ ),
+ # 28: ranked, all gens stale (failure during update cycle)
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+ DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+ ],
+ {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}},
+ {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}},
+ ['nfs:host1(rank=0.2)', 'nfs:host3(rank=1.4)'],
+ ['nfs:host3(rank=1.4)'],
+ ['nfs.1.2']
+ ),
+ # ranked, not enough hosts
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(count=4),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+ DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+ ],
+ {0: {2: '0.2'}, 1: {2: '1.2'}},
+ {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}},
+ ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.2)', 'nfs:host3(rank=2.0)'],
+ ['nfs:host3(rank=2.0)'],
+ []
+ ),
+ # ranked, scale down
+ NodeAssignmentTest(
+ 'nfs',
+ PlacementSpec(hosts=['host2']),
+ 'host1 host2'.split(),
+ [
+ DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2),
+ DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2),
+ DaemonDescription('nfs', '2.2', 'host3', rank=2, rank_generation=2),
+ ],
+ {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {2: '2.2'}},
+ {0: {2: '0.2', 3: None}, 1: {2: '1.2'}, 2: {2: '2.2'}},
+ ['nfs:host2(rank=0.3)'],
+ ['nfs:host2(rank=0.3)'],
+ ['nfs.0.2', 'nfs.1.2', 'nfs.2.2']
+ ),
+
+ ])
+def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map,
+ expected, expected_add, expected_remove):
+ spec = None
+ service_id = None
+ allow_colo = False
+ if service_type == 'rgw':
+ service_id = 'realm.zone'
+ allow_colo = True
+ elif service_type == 'mds':
+ service_id = 'myfs'
+ allow_colo = True
+ elif service_type == 'nfs':
+ service_id = 'mynfs'
+ spec = ServiceSpec(service_type=service_type,
+ service_id=service_id,
+ placement=placement)
+
+ if not spec:
+ spec = ServiceSpec(service_type=service_type,
+ service_id=service_id,
+ placement=placement)
+
+ all_slots, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+ unreachable_hosts=[],
+ daemons=daemons,
+ allow_colo=allow_colo,
+ rank_map=rank_map,
+ ).place()
+
+ assert rank_map == post_rank_map
+
+ got = [str(p) for p in all_slots]
+ num_wildcard = 0
+ for i in expected:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ got = [str(p) for p in to_add]
+ num_wildcard = 0
+ for i in expected_add:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
+
+
+class NodeAssignmentTest5(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ available_hosts: List[str]
+ candidates_hosts: List[str]
+
+
+@pytest.mark.parametrize("service_type, placement, available_hosts, expected_candidates",
+ [ # noqa: E128
+ NodeAssignmentTest5(
+ 'alertmanager',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host3 host1 host4 host2'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'prometheus',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host3 host2 host4 host1'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'grafana',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host2 host4 host3'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'mgr',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host4 host2 host1 host3'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'mon',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host3 host4 host2'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'rgw',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host3 host2 host4'.split(),
+ ),
+ NodeAssignmentTest5(
+ 'cephfs-mirror',
+ PlacementSpec(hosts='host1 host2 host3 host4'.split()),
+ 'host1 host2 host3 host4'.split(),
+ 'host4 host3 host1 host2'.split(),
+ ),
+ ])
+def test_node_assignment_random_shuffle(service_type, placement, available_hosts, expected_candidates):
+ spec = None
+ service_id = None
+ allow_colo = False
+ spec = ServiceSpec(service_type=service_type,
+ service_id=service_id,
+ placement=placement)
+
+ candidates = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h, labels=['foo']) for h in available_hosts],
+ unreachable_hosts=[],
+ daemons=[],
+ allow_colo=allow_colo,
+ ).get_candidates()
+
+ candidates_hosts = [h.hostname for h in candidates]
+ assert candidates_hosts == expected_candidates
+
+
+class NodeAssignmentTest2(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_len: int
+ in_set: List[str]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,in_set",
+ [ # noqa: E128
+ # just count
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host1', 'host2', 'host3'],
+ ),
+
+ # hosts + (smaller) count
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, hosts='host1 host2'.split()),
+ 'host1 host2'.split(),
+ [],
+ 1,
+ ['host1', 'host2'],
+ ),
+ # hosts + (smaller) count, existing
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, hosts='host1 host2 host3'.split()),
+ 'host1 host2 host3'.split(),
+ [DaemonDescription('mgr', 'mgr.a', 'host1')],
+ 1,
+ ['host1', 'host2', 'host3'],
+ ),
+ # hosts + (smaller) count, (more) existing
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, hosts='host1 host2 host3'.split()),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ ],
+ 1,
+ ['host1', 'host2']
+ ),
+ # count + partial host list
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host1', 'host2', 'host3']
+ ),
+ # label + count
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=1, label='foo'),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host1', 'host2', 'host3']
+ ),
+ ])
+def test_node_assignment2(service_type, placement, hosts,
+ daemons, expected_len, in_set):
+ hosts, to_add, to_remove = HostAssignment(
+ spec=ServiceSpec(service_type, placement=placement),
+ hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+ unreachable_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert len(hosts) == expected_len
+ for h in [h.hostname for h in hosts]:
+ assert h in in_set
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected_len,must_have",
+ [ # noqa: E128
+ # hosts + (smaller) count, (more) existing
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=3, hosts='host3'.split()),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host3']
+ ),
+ # count + partial host list
+ NodeAssignmentTest2(
+ 'mgr',
+ PlacementSpec(count=2, hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ [],
+ 1,
+ ['host3']
+ ),
+ ])
+def test_node_assignment3(service_type, placement, hosts,
+ daemons, expected_len, must_have):
+ hosts, to_add, to_remove = HostAssignment(
+ spec=ServiceSpec(service_type, placement=placement),
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert len(hosts) == expected_len
+ for h in must_have:
+ assert h in [h.hostname for h in hosts]
+
+
+class NodeAssignmentTest4(NamedTuple):
+ spec: ServiceSpec
+ networks: Dict[str, Dict[str, Dict[str, List[str]]]]
+ daemons: List[DaemonDescription]
+ expected: List[str]
+ expected_add: List[str]
+ expected_remove: List[DaemonDescription]
+
+
+@pytest.mark.parametrize("spec,networks,daemons,expected,expected_add,expected_remove",
+ [ # noqa: E128
+ NodeAssignmentTest4(
+ ServiceSpec(
+ service_type='rgw',
+ service_id='foo',
+ placement=PlacementSpec(count=6, label='foo'),
+ networks=['10.0.0.0/8'],
+ ),
+ {
+ 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+ 'host2': {'10.0.0.0/8': {'eth0': ['10.0.0.2']}},
+ 'host3': {'192.168.0.0/16': {'eth0': ['192.168.0.1']}},
+ },
+ [],
+ ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)',
+ 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)',
+ 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'],
+ ['rgw:host1(10.0.0.1:80)', 'rgw:host2(10.0.0.2:80)',
+ 'rgw:host1(10.0.0.1:81)', 'rgw:host2(10.0.0.2:81)',
+ 'rgw:host1(10.0.0.1:82)', 'rgw:host2(10.0.0.2:82)'],
+ []
+ ),
+ NodeAssignmentTest4(
+ IngressSpec(
+ service_type='ingress',
+ service_id='rgw.foo',
+ frontend_port=443,
+ monitor_port=8888,
+ virtual_ip='10.0.0.20/8',
+ backend_service='rgw.foo',
+ placement=PlacementSpec(label='foo'),
+ networks=['10.0.0.0/8'],
+ ),
+ {
+ 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+ 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}},
+ 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}},
+ },
+ [],
+ ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1', 'keepalived:host2'],
+ ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1', 'keepalived:host2'],
+ []
+ ),
+ NodeAssignmentTest4(
+ IngressSpec(
+ service_type='ingress',
+ service_id='rgw.foo',
+ frontend_port=443,
+ monitor_port=8888,
+ virtual_ip='10.0.0.20/8',
+ backend_service='rgw.foo',
+ placement=PlacementSpec(label='foo'),
+ networks=['10.0.0.0/8'],
+ ),
+ {
+ 'host1': {'10.0.0.0/8': {'eth0': ['10.0.0.1']}},
+ 'host2': {'10.0.0.0/8': {'eth1': ['10.0.0.2']}},
+ 'host3': {'192.168.0.0/16': {'eth2': ['192.168.0.1']}},
+ },
+ [
+ DaemonDescription('haproxy', 'a', 'host1', ip='10.0.0.1',
+ ports=[443, 8888]),
+ DaemonDescription('keepalived', 'b', 'host2'),
+ DaemonDescription('keepalived', 'c', 'host3'),
+ ],
+ ['haproxy:host1(10.0.0.1:443,8888)', 'haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1', 'keepalived:host2'],
+ ['haproxy:host2(10.0.0.2:443,8888)',
+ 'keepalived:host1'],
+ ['keepalived.c']
+ ),
+ ])
+def test_node_assignment4(spec, networks, daemons,
+ expected, expected_add, expected_remove):
+ all_slots, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()],
+ unreachable_hosts=[],
+ daemons=daemons,
+ allow_colo=True,
+ networks=networks,
+ primary_daemon_type='haproxy' if spec.service_type == 'ingress' else spec.service_type,
+ per_host_daemon_type='keepalived' if spec.service_type == 'ingress' else None,
+ ).place()
+
+ got = [str(p) for p in all_slots]
+ num_wildcard = 0
+ for i in expected:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ got = [str(p) for p in to_add]
+ num_wildcard = 0
+ for i in expected_add:
+ if i == '*':
+ num_wildcard += 1
+ else:
+ assert i in got
+ got.remove(i)
+ assert num_wildcard == len(got)
+
+ assert sorted([d.name() for d in to_remove]) == sorted(expected_remove)
+
+
+@pytest.mark.parametrize("placement",
+ [ # noqa: E128
+ ('1 *'),
+ ('* label:foo'),
+ ('* host1 host2'),
+ ('hostname12hostname12hostname12hostname12hostname12hostname12hostname12'), # > 63 chars
+ ])
+def test_bad_placements(placement):
+ try:
+ PlacementSpec.from_string(placement.split(' '))
+ assert False
+ except SpecValidationError:
+ pass
+
+
+class NodeAssignmentTestBadSpec(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected: str
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected",
+ [ # noqa: E128
+ # unknown host
+ NodeAssignmentTestBadSpec(
+ 'mgr',
+ PlacementSpec(hosts=['unknownhost']),
+ ['knownhost'],
+ [],
+ "Cannot place <ServiceSpec for service_name=mgr> on unknownhost: Unknown hosts"
+ ),
+ # unknown host pattern
+ NodeAssignmentTestBadSpec(
+ 'mgr',
+ PlacementSpec(host_pattern='unknownhost'),
+ ['knownhost'],
+ [],
+ "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts"
+ ),
+ # unknown label
+ NodeAssignmentTestBadSpec(
+ 'mgr',
+ PlacementSpec(label='unknownlabel'),
+ [],
+ [],
+ "Cannot place <ServiceSpec for service_name=mgr>: No matching hosts for label unknownlabel"
+ ),
+ ])
+def test_bad_specs(service_type, placement, hosts, daemons, expected):
+ with pytest.raises(OrchestratorValidationError) as e:
+ hosts, to_add, to_remove = HostAssignment(
+ spec=ServiceSpec(service_type, placement=placement),
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert str(e.value) == expected
+
+
+class ActiveAssignmentTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected: List[List[str]]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected,expected_add,expected_remove",
+ [
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host1', 'host2'], ['host1', 'host3']],
+ [[]],
+ [['mgr.b'], ['mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1', 'host3'], ['host2', 'host3']],
+ [[]],
+ [['mgr.a'], ['mgr.b']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2', is_active=True),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host2']],
+ [[]],
+ [['mgr.a', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host3']],
+ [[]],
+ [['mgr.a', 'mgr.b']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1'], ['host3']],
+ [[]],
+ [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2', is_active=True),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host2', 'host3']],
+ [[]],
+ [['mgr.a']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2', is_active=True),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1'], ['host2'], ['host3']],
+ [[]],
+ [['mgr.a', 'mgr.b'], ['mgr.b', 'mgr.c'], ['mgr.a', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'a2', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host1']],
+ [[]],
+ [['mgr.a2', 'mgr.b', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'a2', 'host1', is_active=True),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3'),
+ ],
+ [['host1']],
+ [[]],
+ [['mgr.a', 'mgr.b', 'mgr.c'], ['mgr.a2', 'mgr.b', 'mgr.c']]
+ ),
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1', is_active=True),
+ DaemonDescription('mgr', 'a2', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1', 'host3']],
+ [[]],
+ [['mgr.a2', 'mgr.b']]
+ ),
+ # Explicit placement should override preference for active daemon
+ ActiveAssignmentTest(
+ 'mgr',
+ PlacementSpec(count=1, hosts=['host1']),
+ 'host1 host2 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [['host1']],
+ [[]],
+ [['mgr.b', 'mgr.c']]
+ ),
+
+ ])
+def test_active_assignment(service_type, placement, hosts, daemons, expected, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id=None,
+ placement=placement)
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in hosts]) in expected
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class UnreachableHostsTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ unreachables_hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove",
+ [
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ ['host2'],
+ [],
+ [['host1', 'host3']],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ ['host1'],
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [['mgr.b']],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ ['host1'],
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [['mgr.b']],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ ['host2'],
+ [],
+ [['host1', 'host3', 'host4']],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host4'.split(),
+ [],
+ [['host2', 'host3']],
+ [[]],
+ ),
+
+ ])
+def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id=None,
+ placement=placement)
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[HostSpec(h) for h in unreachable_hosts],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class RescheduleFromOfflineTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ maintenance_hosts: List[str]
+ offline_hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,maintenance_hosts,offline_hosts,daemons,expected_add,expected_remove",
+ [
+ RescheduleFromOfflineTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [],
+ ['host2'],
+ [
+ DaemonDescription('nfs', 'a', 'host1'),
+ DaemonDescription('nfs', 'b', 'host2'),
+ ],
+ [['host3']],
+ [[]],
+ ),
+ RescheduleFromOfflineTest(
+ 'nfs',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ ['host2'],
+ [],
+ [
+ DaemonDescription('nfs', 'a', 'host1'),
+ DaemonDescription('nfs', 'b', 'host2'),
+ ],
+ [[]],
+ [[]],
+ ),
+ RescheduleFromOfflineTest(
+ 'mon',
+ PlacementSpec(count=2),
+ 'host1 host2 host3'.split(),
+ [],
+ ['host2'],
+ [
+ DaemonDescription('mon', 'a', 'host1'),
+ DaemonDescription('mon', 'b', 'host2'),
+ ],
+ [[]],
+ [[]],
+ ),
+ ])
+def test_remove_from_offline(service_type, placement, hosts, maintenance_hosts, offline_hosts, daemons, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id='test',
+ placement=placement)
+
+ host_specs = [HostSpec(h) for h in hosts]
+ for h in host_specs:
+ if h.hostname in offline_hosts:
+ h.status = 'offline'
+ if h.hostname in maintenance_hosts:
+ h.status = 'maintenance'
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=host_specs,
+ unreachable_hosts=[h for h in host_specs if h.status],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
new file mode 100644
index 000000000..57cd12456
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -0,0 +1,1031 @@
+from textwrap import dedent
+import json
+import yaml
+
+import pytest
+
+from unittest.mock import MagicMock, call, patch, ANY
+
+from cephadm.serve import CephadmServe
+from cephadm.services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
+ RbdMirrorService, CrashService, CephadmDaemonDeploySpec
+from cephadm.services.iscsi import IscsiService
+from cephadm.services.nfs import NFSService
+from cephadm.services.osd import OSDService
+from cephadm.services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \
+ NodeExporterService
+from cephadm.services.exporter import CephadmExporter
+from cephadm.module import CephadmOrchestrator
+from ceph.deployment.service_spec import IscsiServiceSpec, MonitoringSpec, AlertManagerSpec, \
+ ServiceSpec, RGWSpec, GrafanaSpec, SNMPGatewaySpec, IngressSpec, PlacementSpec
+from cephadm.tests.fixtures import with_host, with_service, _run_cephadm
+
+from orchestrator import OrchestratorError
+from orchestrator._interface import DaemonDescription
+
+
+class FakeInventory:
+ def get_addr(self, name: str) -> str:
+ return '1.2.3.4'
+
+
+class FakeMgr:
+ def __init__(self):
+ self.config = ''
+ self.check_mon_command = MagicMock(side_effect=self._check_mon_command)
+ self.mon_command = MagicMock(side_effect=self._check_mon_command)
+ self.template = MagicMock()
+ self.log = MagicMock()
+ self.inventory = FakeInventory()
+
+ def _check_mon_command(self, cmd_dict, inbuf=None):
+ prefix = cmd_dict.get('prefix')
+ if prefix == 'get-cmd':
+ return 0, self.config, ''
+ if prefix == 'set-cmd':
+ self.config = cmd_dict.get('value')
+ return 0, 'value set', ''
+ return -1, '', 'error'
+
+ def get_minimal_ceph_conf(self) -> str:
+ return ''
+
+ def get_mgr_ip(self) -> str:
+ return '1.2.3.4'
+
+
+class TestCephadmService:
+ def test_set_service_url_on_dashboard(self):
+ # pylint: disable=protected-access
+ mgr = FakeMgr()
+ service_url = 'http://svc:1000'
+ service = GrafanaService(mgr)
+ service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+ assert mgr.config == service_url
+
+ # set-cmd should not be called if value doesn't change
+ mgr.check_mon_command.reset_mock()
+ service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+ mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'})
+
+ def _get_services(self, mgr):
+ # services:
+ osd_service = OSDService(mgr)
+ nfs_service = NFSService(mgr)
+ mon_service = MonService(mgr)
+ mgr_service = MgrService(mgr)
+ mds_service = MdsService(mgr)
+ rgw_service = RgwService(mgr)
+ rbd_mirror_service = RbdMirrorService(mgr)
+ grafana_service = GrafanaService(mgr)
+ alertmanager_service = AlertmanagerService(mgr)
+ prometheus_service = PrometheusService(mgr)
+ node_exporter_service = NodeExporterService(mgr)
+ crash_service = CrashService(mgr)
+ iscsi_service = IscsiService(mgr)
+ cephadm_exporter_service = CephadmExporter(mgr)
+ cephadm_services = {
+ 'mon': mon_service,
+ 'mgr': mgr_service,
+ 'osd': osd_service,
+ 'mds': mds_service,
+ 'rgw': rgw_service,
+ 'rbd-mirror': rbd_mirror_service,
+ 'nfs': nfs_service,
+ 'grafana': grafana_service,
+ 'alertmanager': alertmanager_service,
+ 'prometheus': prometheus_service,
+ 'node-exporter': node_exporter_service,
+ 'crash': crash_service,
+ 'iscsi': iscsi_service,
+ 'cephadm-exporter': cephadm_exporter_service,
+ }
+ return cephadm_services
+
+ def test_get_auth_entity(self):
+ mgr = FakeMgr()
+ cephadm_services = self._get_services(mgr)
+
+ for daemon_type in ['rgw', 'rbd-mirror', 'nfs', "iscsi"]:
+ assert "client.%s.id1" % (daemon_type) == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "host")
+ assert "client.%s.id1" % (daemon_type) == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "")
+ assert "client.%s.id1" % (daemon_type) == \
+ cephadm_services[daemon_type].get_auth_entity("id1")
+
+ assert "client.crash.host" == \
+ cephadm_services["crash"].get_auth_entity("id1", "host")
+ with pytest.raises(OrchestratorError):
+ cephadm_services["crash"].get_auth_entity("id1", "")
+ cephadm_services["crash"].get_auth_entity("id1")
+
+ assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "host")
+ assert "mon." == cephadm_services["mon"].get_auth_entity("id1", "")
+ assert "mon." == cephadm_services["mon"].get_auth_entity("id1")
+
+ assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "host")
+ assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1", "")
+ assert "mgr.id1" == cephadm_services["mgr"].get_auth_entity("id1")
+
+ for daemon_type in ["osd", "mds"]:
+ assert "%s.id1" % daemon_type == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "host")
+ assert "%s.id1" % daemon_type == \
+ cephadm_services[daemon_type].get_auth_entity("id1", "")
+ assert "%s.id1" % daemon_type == \
+ cephadm_services[daemon_type].get_auth_entity("id1")
+
+ # services based on CephadmService shouldn't have get_auth_entity
+ with pytest.raises(AttributeError):
+ for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'cephadm-exporter']:
+ cephadm_services[daemon_type].get_auth_entity("id1", "host")
+ cephadm_services[daemon_type].get_auth_entity("id1", "")
+ cephadm_services[daemon_type].get_auth_entity("id1")
+
+
+class TestISCSIService:
+
+ mgr = FakeMgr()
+ iscsi_service = IscsiService(mgr)
+
+ iscsi_spec = IscsiServiceSpec(service_type='iscsi', service_id="a")
+ iscsi_spec.daemon_type = "iscsi"
+ iscsi_spec.daemon_id = "a"
+ iscsi_spec.spec = MagicMock()
+ iscsi_spec.spec.daemon_type = "iscsi"
+ iscsi_spec.spec.ssl_cert = ''
+ iscsi_spec.api_user = "user"
+ iscsi_spec.api_password = "password"
+ iscsi_spec.api_port = 5000
+ iscsi_spec.api_secure = False
+ iscsi_spec.ssl_cert = "cert"
+ iscsi_spec.ssl_key = "key"
+
+ mgr.spec_store = MagicMock()
+ mgr.spec_store.all_specs.get.return_value = iscsi_spec
+
+ def test_iscsi_client_caps(self):
+
+ iscsi_daemon_spec = CephadmDaemonDeploySpec(
+ host='host', daemon_id='a', service_name=self.iscsi_spec.service_name())
+
+ self.iscsi_service.prepare_create(iscsi_daemon_spec)
+
+ expected_caps = ['mon',
+ 'profile rbd, allow command "osd blocklist", allow command "config-key get" with "key" prefix "iscsi/"',
+ 'mgr', 'allow command "service status"',
+ 'osd', 'allow rwx']
+
+ expected_call = call({'prefix': 'auth get-or-create',
+ 'entity': 'client.iscsi.a',
+ 'caps': expected_caps})
+ expected_call2 = call({'prefix': 'auth caps',
+ 'entity': 'client.iscsi.a',
+ 'caps': expected_caps})
+
+ assert expected_call in self.mgr.mon_command.mock_calls
+ assert expected_call2 in self.mgr.mon_command.mock_calls
+
+ @patch('cephadm.utils.resolve_ip')
+ def test_iscsi_dashboard_config(self, mock_resolve_ip):
+
+ self.mgr.check_mon_command = MagicMock()
+ self.mgr.check_mon_command.return_value = ('', '{"gateways": {}}', '')
+
+ # Case 1: use IPV4 address
+ id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1",
+ daemon_id="a", ip='192.168.1.1')
+ daemon_list = [id1]
+ mock_resolve_ip.return_value = '192.168.1.1'
+
+ self.iscsi_service.config_dashboard(daemon_list)
+
+ dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+ 'name': 'testhost1'},
+ 'http://user:password@192.168.1.1:5000')
+
+ assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+ # Case 2: use IPV6 address
+ self.mgr.check_mon_command.reset_mock()
+
+ id1 = DaemonDescription(daemon_type='iscsi', hostname="testhost1",
+ daemon_id="a", ip='FEDC:BA98:7654:3210:FEDC:BA98:7654:3210')
+ mock_resolve_ip.return_value = 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210'
+
+ self.iscsi_service.config_dashboard(daemon_list)
+
+ dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+ 'name': 'testhost1'},
+ 'http://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000')
+
+ assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+ # Case 3: IPV6 Address . Secure protocol
+ self.mgr.check_mon_command.reset_mock()
+
+ self.iscsi_spec.api_secure = True
+
+ self.iscsi_service.config_dashboard(daemon_list)
+
+ dashboard_expected_call = call({'prefix': 'dashboard iscsi-gateway-add',
+ 'name': 'testhost1'},
+ 'https://user:password@[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:5000')
+
+ assert dashboard_expected_call in self.mgr.check_mon_command.mock_calls
+
+
+class TestMonitoring:
+ def _get_config(self, url: str) -> str:
+ return f"""
+ # This file is generated by cephadm.
+ # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+ global:
+ resolve_timeout: 5m
+ http_config:
+ tls_config:
+ insecure_skip_verify: true
+
+ route:
+ receiver: 'default'
+ routes:
+ - group_by: ['alertname']
+ group_wait: 10s
+ group_interval: 10s
+ repeat_interval: 1h
+ receiver: 'ceph-dashboard'
+
+ receivers:
+ - name: 'default'
+ webhook_configs:
+ - name: 'ceph-dashboard'
+ webhook_configs:
+ - url: '{url}/api/prometheus_receiver'
+ """
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("mgr_module.MgrModule.get")
+ def test_alertmanager_config(self, mock_get, _run_cephadm,
+ cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ mock_get.return_value = {"services": {"dashboard": "http://[::1]:8080"}}
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, AlertManagerSpec()):
+ y = dedent(self._get_config('http://localhost:8080')).lstrip()
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'alertmanager.test',
+ 'deploy',
+ [
+ '--name', 'alertmanager.test',
+ '--meta-json', '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-', '--tcp-ports', '9093 9094'
+ ],
+ stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}),
+ image='')
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("mgr_module.MgrModule.get")
+ def test_alertmanager_config_v6(self, mock_get, _run_cephadm,
+ cephadm_module: CephadmOrchestrator):
+ dashboard_url = "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080"
+ _run_cephadm.return_value = ('{}', '', 0)
+ mock_get.return_value = {"services": {"dashboard": dashboard_url}}
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, AlertManagerSpec()):
+ y = dedent(self._get_config(dashboard_url)).lstrip()
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'alertmanager.test',
+ 'deploy',
+ [
+ '--name', 'alertmanager.test',
+ '--meta-json',
+ '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-', '--tcp-ports', '9093 9094'
+ ],
+ stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}),
+ image='')
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("mgr_module.MgrModule.get")
+ @patch("socket.getfqdn")
+ def test_alertmanager_config_v6_fqdn(self, mock_getfqdn, mock_get, _run_cephadm,
+ cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ mock_getfqdn.return_value = "mgr.test.fqdn"
+ mock_get.return_value = {"services": {
+ "dashboard": "http://[2001:db8:4321:0000:0000:0000:0000:0000]:8080"}}
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, AlertManagerSpec()):
+ y = dedent(self._get_config("http://mgr.test.fqdn:8080")).lstrip()
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'alertmanager.test',
+ 'deploy',
+ [
+ '--name', 'alertmanager.test',
+ '--meta-json',
+ '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-', '--tcp-ports', '9093 9094'
+ ],
+ stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}),
+ image='')
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("mgr_module.MgrModule.get")
+ def test_alertmanager_config_v4(self, mock_get, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ dashboard_url = "http://192.168.0.123:8080"
+ _run_cephadm.return_value = ('{}', '', 0)
+ mock_get.return_value = {"services": {"dashboard": dashboard_url}}
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, AlertManagerSpec()):
+ y = dedent(self._get_config(dashboard_url)).lstrip()
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'alertmanager.test',
+ 'deploy',
+ [
+ '--name', 'alertmanager.test',
+ '--meta-json', '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-', '--tcp-ports', '9093 9094'
+ ],
+ stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}),
+ image='')
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("mgr_module.MgrModule.get")
+ @patch("socket.getfqdn")
+ def test_alertmanager_config_v4_fqdn(self, mock_getfqdn, mock_get, _run_cephadm,
+ cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ mock_getfqdn.return_value = "mgr.test.fqdn"
+ mock_get.return_value = {"services": {"dashboard": "http://192.168.0.123:8080"}}
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, AlertManagerSpec()):
+ y = dedent(self._get_config("http://mgr.test.fqdn:8080")).lstrip()
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'alertmanager.test',
+ 'deploy',
+ [
+ '--name', 'alertmanager.test',
+ '--meta-json',
+ '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-', '--tcp-ports', '9093 9094'
+ ],
+ stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}),
+ image='')
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_prometheus_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
+ with_service(cephadm_module, MonitoringSpec('prometheus')) as _:
+
+ y = dedent("""
+ # This file is generated by cephadm.
+ global:
+ scrape_interval: 10s
+ evaluation_interval: 10s
+ rule_files:
+ - /etc/prometheus/alerting/*
+ scrape_configs:
+ - job_name: 'ceph'
+ honor_labels: true
+ static_configs:
+ - targets:
+ - '[::1]:9283'
+
+ - job_name: 'node'
+ static_configs:
+ - targets: ['[1::4]:9100']
+ labels:
+ instance: 'test'
+
+ """).lstrip()
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'prometheus.test',
+ 'deploy',
+ [
+ '--name', 'prometheus.test',
+ '--meta-json',
+ '{"service_name": "prometheus", "ports": [9095], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-',
+ '--tcp-ports', '9095'
+ ],
+ stdin=json.dumps({"files": {"prometheus.yml": y,
+ "/etc/prometheus/alerting/custom_alerts.yml": ""},
+ 'retention_time': '15d'}),
+ image='')
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+ @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
+ def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.set_store('test/grafana_crt', 'c')
+ cephadm_module.set_store('test/grafana_key', 'k')
+ with with_service(cephadm_module, MonitoringSpec('prometheus')) as _, \
+ with_service(cephadm_module, GrafanaSpec('grafana')) as _:
+ files = {
+ 'grafana.ini': dedent("""
+ # This file is generated by cephadm.
+ [users]
+ default_theme = light
+ [auth.anonymous]
+ enabled = true
+ org_name = 'Main Org.'
+ org_role = 'Viewer'
+ [server]
+ domain = 'bootstrap.storage.lab'
+ protocol = https
+ cert_file = /etc/grafana/certs/cert_file
+ cert_key = /etc/grafana/certs/cert_key
+ http_port = 3000
+ http_addr =
+ [security]
+ disable_initial_admin_creation = true
+ cookie_secure = true
+ cookie_samesite = none
+ allow_embedding = true""").lstrip(), # noqa: W291
+ 'provisioning/datasources/ceph-dashboard.yml': dedent("""
+ # This file is generated by cephadm.
+ deleteDatasources:
+ - name: 'Dashboard1'
+ orgId: 1
+
+ datasources:
+ - name: 'Dashboard1'
+ type: 'prometheus'
+ access: 'proxy'
+ orgId: 1
+ url: 'http://[1::4]:9095'
+ basicAuth: false
+ isDefault: true
+ editable: false
+ """).lstrip(),
+ 'certs/cert_file': dedent("""
+ # generated by cephadm
+ c""").lstrip(),
+ 'certs/cert_key': dedent("""
+ # generated by cephadm
+ k""").lstrip(),
+ }
+
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'grafana.test',
+ 'deploy',
+ [
+ '--name', 'grafana.test',
+ '--meta-json',
+ '{"service_name": "grafana", "ports": [3000], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-', '--tcp-ports', '3000'],
+ stdin=json.dumps({"files": files}),
+ image='')
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_grafana_initial_admin_pw(self, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, GrafanaSpec(initial_admin_password='secure')):
+ out = cephadm_module.cephadm_services['grafana'].generate_config(
+ CephadmDaemonDeploySpec('test', 'daemon', 'grafana'))
+ assert out == (
+ {
+ 'files':
+ {
+ 'certs/cert_file': ANY,
+ 'certs/cert_key': ANY,
+ 'grafana.ini':
+ '# This file is generated by cephadm.\n'
+ '[users]\n'
+ ' default_theme = light\n'
+ '[auth.anonymous]\n'
+ ' enabled = true\n'
+ " org_name = 'Main Org.'\n"
+ " org_role = 'Viewer'\n"
+ '[server]\n'
+ " domain = 'bootstrap.storage.lab'\n"
+ ' protocol = https\n'
+ ' cert_file = /etc/grafana/certs/cert_file\n'
+ ' cert_key = /etc/grafana/certs/cert_key\n'
+ ' http_port = 3000\n'
+ ' http_addr = \n'
+ '[security]\n'
+ ' admin_user = admin\n'
+ ' admin_password = secure\n'
+ ' cookie_secure = true\n'
+ ' cookie_samesite = none\n'
+ ' allow_embedding = true',
+ 'provisioning/datasources/ceph-dashboard.yml':
+ '# This file is generated by cephadm.\n'
+ 'deleteDatasources:\n'
+ '\n'
+ 'datasources:\n'
+ }
+ },
+ [],
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ with with_host(cephadm_module, 'test'):
+
+ yaml_str = """service_type: alertmanager
+service_name: alertmanager
+placement:
+ count: 1
+spec:
+ port: 4200
+"""
+ yaml_file = yaml.safe_load(yaml_str)
+ spec = ServiceSpec.from_json(yaml_file)
+
+ with patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])):
+ with with_service(cephadm_module, spec):
+
+ CephadmServe(cephadm_module)._check_daemons()
+
+ _run_cephadm.assert_called_with(
+ 'test', 'alertmanager.test', 'deploy', [
+ '--name', 'alertmanager.test',
+ '--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-',
+ '--tcp-ports', '4200 9094',
+ '--reconfig'
+ ],
+ stdin='{}',
+ image='')
+
+
+class TestRGWService:
+
+ @pytest.mark.parametrize(
+ "frontend, ssl, expected",
+ [
+ ('beast', False, 'beast endpoint=[fd00:fd00:fd00:3000::1]:80'),
+ ('beast', True,
+ 'beast ssl_endpoint=[fd00:fd00:fd00:3000::1]:443 ssl_certificate=config://rgw/cert/rgw.foo'),
+ ('civetweb', False, 'civetweb port=[fd00:fd00:fd00:3000::1]:80'),
+ ('civetweb', True,
+ 'civetweb port=[fd00:fd00:fd00:3000::1]:443s ssl_certificate=config://rgw/cert/rgw.foo'),
+ ]
+ )
+ @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_rgw_update(self, frontend, ssl, expected, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ cephadm_module.cache.update_host_devices_networks(
+ 'host1',
+ dls=cephadm_module.cache.devices['host1'],
+ nets={
+ 'fd00:fd00:fd00:3000::/64': {
+ 'if0': ['fd00:fd00:fd00:3000::1']
+ }
+ })
+ s = RGWSpec(service_id="foo",
+ networks=['fd00:fd00:fd00:3000::/64'],
+ ssl=ssl,
+ rgw_frontend_type=frontend)
+ with with_service(cephadm_module, s) as dds:
+ _, f, _ = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': f'client.{dds[0]}',
+ 'key': 'rgw_frontends',
+ })
+ assert f == expected
+
+
+class TestSNMPGateway:
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v2c_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V2c',
+ snmp_destination='192.168.1.1:162',
+ credentials={
+ 'snmp_community': 'public'
+ })
+
+ config = {
+ "destination": spec.snmp_destination,
+ "snmp_version": spec.snmp_version,
+ "snmp_community": spec.credentials.get('snmp_community')
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'snmp-gateway.test',
+ 'deploy',
+ [
+ '--name', 'snmp-gateway.test',
+ '--meta-json',
+ '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-',
+ '--tcp-ports', '9464'
+ ],
+ stdin=json.dumps(config),
+ image=''
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v2c_with_port(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V2c',
+ snmp_destination='192.168.1.1:162',
+ credentials={
+ 'snmp_community': 'public'
+ },
+ port=9465)
+
+ config = {
+ "destination": spec.snmp_destination,
+ "snmp_version": spec.snmp_version,
+ "snmp_community": spec.credentials.get('snmp_community')
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'snmp-gateway.test',
+ 'deploy',
+ [
+ '--name', 'snmp-gateway.test',
+ '--meta-json',
+ '{"service_name": "snmp-gateway", "ports": [9465], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-',
+ '--tcp-ports', '9465'
+ ],
+ stdin=json.dumps(config),
+ image=''
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v3nopriv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V3',
+ snmp_destination='192.168.1.1:162',
+ engine_id='8000C53F00000000',
+ credentials={
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword'
+ })
+
+ config = {
+ 'destination': spec.snmp_destination,
+ 'snmp_version': spec.snmp_version,
+ 'snmp_v3_auth_protocol': 'SHA',
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword',
+ 'snmp_v3_engine_id': '8000C53F00000000'
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'snmp-gateway.test',
+ 'deploy',
+ [
+ '--name', 'snmp-gateway.test',
+ '--meta-json',
+ '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-',
+ '--tcp-ports', '9464'
+ ],
+ stdin=json.dumps(config),
+ image=''
+ )
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_snmp_v3priv_deployment(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ spec = SNMPGatewaySpec(
+ snmp_version='V3',
+ snmp_destination='192.168.1.1:162',
+ engine_id='8000C53F00000000',
+ auth_protocol='MD5',
+ privacy_protocol='AES',
+ credentials={
+ 'snmp_v3_auth_username': 'myuser',
+ 'snmp_v3_auth_password': 'mypassword',
+ 'snmp_v3_priv_password': 'mysecret',
+ })
+
+ config = {
+ 'destination': spec.snmp_destination,
+ 'snmp_version': spec.snmp_version,
+ 'snmp_v3_auth_protocol': 'MD5',
+ 'snmp_v3_auth_username': spec.credentials.get('snmp_v3_auth_username'),
+ 'snmp_v3_auth_password': spec.credentials.get('snmp_v3_auth_password'),
+ 'snmp_v3_engine_id': '8000C53F00000000',
+ 'snmp_v3_priv_protocol': spec.privacy_protocol,
+ 'snmp_v3_priv_password': spec.credentials.get('snmp_v3_priv_password'),
+ }
+
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, spec):
+ _run_cephadm.assert_called_with(
+ 'test',
+ 'snmp-gateway.test',
+ 'deploy',
+ [
+ '--name', 'snmp-gateway.test',
+ '--meta-json',
+ '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}',
+ '--config-json', '-',
+ '--tcp-ports', '9464'
+ ],
+ stdin=json.dumps(config),
+ image=''
+ )
+
+
+class TestIngressService:
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ingress_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.cache.update_host_devices_networks(
+ 'test',
+ cephadm_module.cache.devices['test'],
+ {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.4/32']
+ }
+ }
+ )
+
+ # the ingress backend
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast')
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ backend_service='rgw.foo',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_interface_networks=['1.2.3.0/24'],
+ virtual_ip="1.2.3.4/32")
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ # generate the keepalived conf based on the specified spec
+ keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ keepalived_expected_conf = {
+ 'files':
+ {
+ 'keepalived.conf':
+ '# This file is generated by cephadm.\n'
+ 'vrrp_script check_backend {\n '
+ 'script "/usr/bin/curl http://localhost:8999/health"\n '
+ 'weight -20\n '
+ 'interval 2\n '
+ 'rise 2\n '
+ 'fall 2\n}\n\n'
+ 'vrrp_instance VI_0 {\n '
+ 'state MASTER\n '
+ 'priority 100\n '
+ 'interface if0\n '
+ 'virtual_router_id 50\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 1::4\n '
+ 'unicast_peer {\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '1.2.3.4/32 dev if0\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ }
+ }
+
+ # check keepalived config
+ assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+ # generate the haproxy conf based on the specified spec
+ haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ haproxy_expected_conf = {
+ 'files':
+ {
+ 'haproxy.cfg':
+ '# This file is generated by cephadm.'
+ '\nglobal\n log '
+ '127.0.0.1 local2\n '
+ 'chroot /var/lib/haproxy\n '
+ 'pidfile /var/lib/haproxy/haproxy.pid\n '
+ 'maxconn 8000\n '
+ 'daemon\n '
+ 'stats socket /var/lib/haproxy/stats\n'
+ '\ndefaults\n '
+ 'mode http\n '
+ 'log global\n '
+ 'option httplog\n '
+ 'option dontlognull\n '
+ 'option http-server-close\n '
+ 'option forwardfor except 127.0.0.0/8\n '
+ 'option redispatch\n '
+ 'retries 3\n '
+ 'timeout queue 20s\n '
+ 'timeout connect 5s\n '
+ 'timeout http-request 1s\n '
+ 'timeout http-keep-alive 5s\n '
+ 'timeout client 1s\n '
+ 'timeout server 1s\n '
+ 'timeout check 5s\n '
+ 'maxconn 8000\n'
+ '\nfrontend stats\n '
+ 'mode http\n '
+ 'bind 1.2.3.4:8999\n '
+ 'bind localhost:8999\n '
+ 'stats enable\n '
+ 'stats uri /stats\n '
+ 'stats refresh 10s\n '
+ 'stats auth admin:12345\n '
+ 'http-request use-service prometheus-exporter if { path /metrics }\n '
+ 'monitor-uri /health\n'
+ '\nfrontend frontend\n '
+ 'bind 1.2.3.4:8089\n '
+ 'default_backend backend\n\n'
+ 'backend backend\n '
+ 'option forwardfor\n '
+ 'balance static-rr\n '
+ 'option httpchk HEAD / HTTP/1.0\n '
+ 'server ' + haproxy_generated_conf[1][0] + ' 1::4:80 check weight 100\n'
+ }
+ }
+
+ assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+
+ with with_host(cephadm_module, 'test'):
+ cephadm_module.cache.update_host_devices_networks('test', [], {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.4/32']
+ }
+ })
+
+ # Check the ingress with multiple VIPs
+ s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1),
+ rgw_frontend_type='beast')
+
+ ispec = IngressSpec(service_type='ingress',
+ service_id='test',
+ backend_service='rgw.foo',
+ frontend_port=8089,
+ monitor_port=8999,
+ monitor_user='admin',
+ monitor_password='12345',
+ keepalived_password='12345',
+ virtual_interface_networks=['1.2.3.0/24'],
+ virtual_ips_list=["1.2.3.4/32"])
+ with with_service(cephadm_module, s) as _, with_service(cephadm_module, ispec) as _:
+ # generate the keepalived conf based on the specified spec
+ # Test with only 1 IP on the list, as it will fail with more VIPS but only one host.
+ keepalived_generated_conf = cephadm_module.cephadm_services['ingress'].keepalived_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ keepalived_expected_conf = {
+ 'files':
+ {
+ 'keepalived.conf':
+ '# This file is generated by cephadm.\n'
+ 'vrrp_script check_backend {\n '
+ 'script "/usr/bin/curl http://localhost:8999/health"\n '
+ 'weight -20\n '
+ 'interval 2\n '
+ 'rise 2\n '
+ 'fall 2\n}\n\n'
+ 'vrrp_instance VI_0 {\n '
+ 'state MASTER\n '
+ 'priority 100\n '
+ 'interface if0\n '
+ 'virtual_router_id 50\n '
+ 'advert_int 1\n '
+ 'authentication {\n '
+ 'auth_type PASS\n '
+ 'auth_pass 12345\n '
+ '}\n '
+ 'unicast_src_ip 1::4\n '
+ 'unicast_peer {\n '
+ '}\n '
+ 'virtual_ipaddress {\n '
+ '1.2.3.4/32 dev if0\n '
+ '}\n '
+ 'track_script {\n '
+ 'check_backend\n }\n'
+ '}\n'
+ }
+ }
+
+ # check keepalived config
+ assert keepalived_generated_conf[0] == keepalived_expected_conf
+
+ # generate the haproxy conf based on the specified spec
+ haproxy_generated_conf = cephadm_module.cephadm_services['ingress'].haproxy_generate_config(
+ CephadmDaemonDeploySpec(host='test', daemon_id='ingress', service_name=ispec.service_name()))
+
+ haproxy_expected_conf = {
+ 'files':
+ {
+ 'haproxy.cfg':
+ '# This file is generated by cephadm.'
+ '\nglobal\n log '
+ '127.0.0.1 local2\n '
+ 'chroot /var/lib/haproxy\n '
+ 'pidfile /var/lib/haproxy/haproxy.pid\n '
+ 'maxconn 8000\n '
+ 'daemon\n '
+ 'stats socket /var/lib/haproxy/stats\n'
+ '\ndefaults\n '
+ 'mode http\n '
+ 'log global\n '
+ 'option httplog\n '
+ 'option dontlognull\n '
+ 'option http-server-close\n '
+ 'option forwardfor except 127.0.0.0/8\n '
+ 'option redispatch\n '
+ 'retries 3\n '
+ 'timeout queue 20s\n '
+ 'timeout connect 5s\n '
+ 'timeout http-request 1s\n '
+ 'timeout http-keep-alive 5s\n '
+ 'timeout client 1s\n '
+ 'timeout server 1s\n '
+ 'timeout check 5s\n '
+ 'maxconn 8000\n'
+ '\nfrontend stats\n '
+ 'mode http\n '
+ 'bind *:8999\n '
+ 'bind localhost:8999\n '
+ 'stats enable\n '
+ 'stats uri /stats\n '
+ 'stats refresh 10s\n '
+ 'stats auth admin:12345\n '
+ 'http-request use-service prometheus-exporter if { path /metrics }\n '
+ 'monitor-uri /health\n'
+ '\nfrontend frontend\n '
+ 'bind *:8089\n '
+ 'default_backend backend\n\n'
+ 'backend backend\n '
+ 'option forwardfor\n '
+ 'balance static-rr\n '
+ 'option httpchk HEAD / HTTP/1.0\n '
+ 'server '
+ + haproxy_generated_conf[1][0] + ' 1::4:80 check weight 100\n'
+ }
+ }
+
+ assert haproxy_generated_conf[0] == haproxy_expected_conf
+
+
+class TestCephFsMirror:
+ @patch("cephadm.serve.CephadmServe._run_cephadm")
+ def test_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+ _run_cephadm.return_value = ('{}', '', 0)
+ with with_host(cephadm_module, 'test'):
+ with with_service(cephadm_module, ServiceSpec('cephfs-mirror')):
+ cephadm_module.assert_issued_mon_command({
+ 'prefix': 'mgr module enable',
+ 'module': 'mirroring'
+ })
diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py
new file mode 100644
index 000000000..54aa0a7ab
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_spec.py
@@ -0,0 +1,600 @@
+# Disable autopep8 for this file:
+
+# fmt: off
+
+import json
+
+import pytest
+
+from ceph.deployment.service_spec import ServiceSpec, NFSServiceSpec, RGWSpec, \
+ IscsiServiceSpec, HostPlacementSpec, CustomContainerSpec
+from orchestrator import DaemonDescription, OrchestratorError
+
+
+@pytest.mark.parametrize(
+ "spec_json",
+ json.loads("""[
+{
+ "placement": {
+ "count": 1
+ },
+ "service_type": "alertmanager"
+},
+{
+ "placement": {
+ "host_pattern": "*"
+ },
+ "service_type": "crash"
+},
+{
+ "placement": {
+ "count": 1
+ },
+ "service_type": "grafana"
+},
+{
+ "placement": {
+ "count": 2
+ },
+ "service_type": "mgr"
+},
+{
+ "placement": {
+ "count": 5
+ },
+ "service_type": "mon"
+},
+{
+ "placement": {
+ "host_pattern": "*"
+ },
+ "service_type": "node-exporter"
+},
+{
+ "placement": {
+ "count": 1
+ },
+ "service_type": "prometheus"
+},
+{
+ "placement": {
+ "hosts": [
+ {
+ "hostname": "ceph-001",
+ "network": "",
+ "name": ""
+ }
+ ]
+ },
+ "service_type": "rgw",
+ "service_id": "default-rgw-realm.eu-central-1.1",
+ "rgw_realm": "default-rgw-realm",
+ "rgw_zone": "eu-central-1"
+},
+{
+ "service_type": "osd",
+ "service_id": "osd_spec_default",
+ "placement": {
+ "host_pattern": "*"
+ },
+ "data_devices": {
+ "model": "MC-55-44-XZ"
+ },
+ "db_devices": {
+ "model": "SSD-123-foo"
+ },
+ "wal_devices": {
+ "model": "NVME-QQQQ-987"
+ }
+}
+]
+""")
+)
+def test_spec_octopus(spec_json):
+ # https://tracker.ceph.com/issues/44934
+ # Those are real user data from early octopus.
+ # Please do not modify those JSON values.
+
+ spec = ServiceSpec.from_json(spec_json)
+
+ # just some verification that we can sill read old octopus specs
+ def convert_to_old_style_json(j):
+ j_c = dict(j.copy())
+ j_c.pop('service_name', None)
+ if 'spec' in j_c:
+ spec = j_c.pop('spec')
+ j_c.update(spec)
+ if 'placement' in j_c:
+ if 'hosts' in j_c['placement']:
+ j_c['placement']['hosts'] = [
+ {
+ 'hostname': HostPlacementSpec.parse(h).hostname,
+ 'network': HostPlacementSpec.parse(h).network,
+ 'name': HostPlacementSpec.parse(h).name
+ }
+ for h in j_c['placement']['hosts']
+ ]
+ j_c.pop('objectstore', None)
+ j_c.pop('filter_logic', None)
+ return j_c
+
+ assert spec_json == convert_to_old_style_json(spec.to_json())
+
+
+@pytest.mark.parametrize(
+ "dd_json",
+ json.loads("""[
+ {
+ "hostname": "ceph-001",
+ "container_id": "d94d7969094d",
+ "container_image_id": "0881eb8f169f5556a292b4e2c01d683172b12830a62a9225a98a8e206bb734f0",
+ "container_image_name": "docker.io/prom/alertmanager:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "alertmanager",
+ "version": "0.20.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725856",
+ "created": "2020-04-02T19:23:08.829543",
+ "started": "2020-04-03T07:29:16.932838",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "c4b036202241",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "ceph-001",
+ "daemon_type": "crash",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725903",
+ "created": "2020-04-02T19:23:11.390694",
+ "started": "2020-04-03T07:29:16.910897",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "5b7b94b48f31",
+ "container_image_id": "87a51ecf0b1c9a7b187b21c1b071425dafea0d765a96d5bc371c791169b3d7f4",
+ "container_image_name": "docker.io/ceph/ceph-grafana:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "grafana",
+ "version": "6.6.2",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725950",
+ "created": "2020-04-02T19:23:52.025088",
+ "started": "2020-04-03T07:29:16.847972",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "9ca007280456",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "ceph-001.gkjwqp",
+ "daemon_type": "mgr",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725807",
+ "created": "2020-04-02T19:22:18.648584",
+ "started": "2020-04-03T07:29:16.856153",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "3d1ba9a2b697",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "ceph-001",
+ "daemon_type": "mon",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725715",
+ "created": "2020-04-02T19:22:13.863300",
+ "started": "2020-04-03T07:29:17.206024",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "36d026c68ba1",
+ "container_image_id": "e5a616e4b9cf68dfcad7782b78e118be4310022e874d52da85c55923fb615f87",
+ "container_image_name": "docker.io/prom/node-exporter:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "node-exporter",
+ "version": "0.18.1",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.725996",
+ "created": "2020-04-02T19:23:53.880197",
+ "started": "2020-04-03T07:29:16.880044",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "faf76193cbfe",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "0",
+ "daemon_type": "osd",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.726088",
+ "created": "2020-04-02T20:35:02.991435",
+ "started": "2020-04-03T07:29:19.373956",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "f82505bae0f1",
+ "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
+ "container_image_name": "docker.io/ceph/ceph:v15",
+ "daemon_id": "1",
+ "daemon_type": "osd",
+ "version": "15.2.0",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.726134",
+ "created": "2020-04-02T20:35:17.142272",
+ "started": "2020-04-03T07:29:19.374002",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "container_id": "2708d84cd484",
+ "container_image_id": "358a0d2395fe711bb8258e8fb4b2d7865c0a9a6463969bcd1452ee8869ea6653",
+ "container_image_name": "docker.io/prom/prometheus:latest",
+ "daemon_id": "ceph-001",
+ "daemon_type": "prometheus",
+ "version": "2.17.1",
+ "status": 1,
+ "status_desc": "running",
+ "last_refresh": "2020-04-03T15:31:48.726042",
+ "created": "2020-04-02T19:24:10.281163",
+ "started": "2020-04-03T07:29:16.926292",
+ "is_active": false
+ },
+ {
+ "hostname": "ceph-001",
+ "daemon_id": "default-rgw-realm.eu-central-1.1.ceph-001.ytywjo",
+ "daemon_type": "rgw",
+ "status": 1,
+ "status_desc": "starting",
+ "is_active": false
+ }
+]""")
+)
+def test_dd_octopus(dd_json):
+ # https://tracker.ceph.com/issues/44934
+ # Those are real user data from early octopus.
+ # Please do not modify those JSON values.
+
+ # Convert datetime properties to old style.
+ # 2020-04-03T07:29:16.926292Z -> 2020-04-03T07:29:16.926292
+ def convert_to_old_style_json(j):
+ for k in ['last_refresh', 'created', 'started', 'last_deployed',
+ 'last_configured']:
+ if k in j:
+ j[k] = j[k].rstrip('Z')
+ del j['daemon_name']
+ return j
+
+ assert dd_json == convert_to_old_style_json(
+ DaemonDescription.from_json(dd_json).to_json())
+
+
+@pytest.mark.parametrize("spec,dd,valid",
+[ # noqa: E128
+ # https://tracker.ceph.com/issues/44934
+ (
+ RGWSpec(
+ service_id="foo",
+ rgw_realm="default-rgw-realm",
+ rgw_zone="eu-central-1",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="foo.ceph-001.ytywjo",
+ hostname="ceph-001",
+ ),
+ True
+ ),
+ (
+ # no realm
+ RGWSpec(
+ service_id="foo.bar",
+ rgw_zone="eu-central-1",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="foo.bar.ceph-001.ytywjo",
+ hostname="ceph-001",
+ ),
+ True
+ ),
+ (
+ # no realm or zone
+ RGWSpec(
+ service_id="bar",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="bar.host.domain.tld.ytywjo",
+ hostname="host.domain.tld",
+ ),
+ True
+ ),
+ (
+ # explicit naming
+ RGWSpec(
+ service_id="realm.zone",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="realm.zone.a",
+ hostname="smithi028",
+ ),
+ True
+ ),
+ (
+ # without host
+ RGWSpec(
+ service_type='rgw',
+ service_id="foo",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="foo.hostname.ytywjo",
+ hostname=None,
+ ),
+ False
+ ),
+ (
+ # without host (2)
+ RGWSpec(
+ service_type='rgw',
+ service_id="default-rgw-realm.eu-central-1.1",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="default-rgw-realm.eu-central-1.1.hostname.ytywjo",
+ hostname=None,
+ ),
+ False
+ ),
+ (
+ # service_id contains hostname
+ # (sort of) https://tracker.ceph.com/issues/45294
+ RGWSpec(
+ service_id="default.rgw.realm.ceph.001",
+ ),
+ DaemonDescription(
+ daemon_type='rgw',
+ daemon_id="default.rgw.realm.ceph.001.ceph.001.ytywjo",
+ hostname="ceph.001",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45293
+ (
+ ServiceSpec(
+ service_type='mds',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # '.' char in service_id
+ ServiceSpec(
+ service_type='mds',
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a.b.c.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45617
+ (
+ # daemon_id does not contain hostname
+ ServiceSpec(
+ service_type='mds',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # daemon_id only contains hostname
+ ServiceSpec(
+ service_type='mds',
+ service_id="host1",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="host1",
+ hostname="host1",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45399
+ (
+ # daemon_id only contains hostname
+ ServiceSpec(
+ service_type='mds',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='mds',
+ daemon_id="a.host1.abc123",
+ hostname="host1.site",
+ ),
+ True
+ ),
+ (
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.host1",
+ hostname="host1.site",
+ ),
+ True
+ ),
+
+ # https://tracker.ceph.com/issues/45293
+ (
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.host1",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # service_id contains a '.' char
+ NFSServiceSpec(
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.b.c.host1",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # trailing chars after hostname
+ NFSServiceSpec(
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.b.c.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # chars after hostname without '.'
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="a.host1abc123",
+ hostname="host1",
+ ),
+ False
+ ),
+ (
+ # chars before hostname without '.'
+ NFSServiceSpec(
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='nfs',
+ daemon_id="ahost1.abc123",
+ hostname="host1",
+ ),
+ False
+ ),
+
+ # https://tracker.ceph.com/issues/45293
+ (
+ IscsiServiceSpec(
+ service_type='iscsi',
+ service_id="a",
+ ),
+ DaemonDescription(
+ daemon_type='iscsi',
+ daemon_id="a.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # '.' char in service_id
+ IscsiServiceSpec(
+ service_type='iscsi',
+ service_id="a.b.c",
+ ),
+ DaemonDescription(
+ daemon_type='iscsi',
+ daemon_id="a.b.c.host1.abc123",
+ hostname="host1",
+ ),
+ True
+ ),
+ (
+ # fixed daemon id for teuthology.
+ IscsiServiceSpec(
+ service_type='iscsi',
+ service_id='iscsi',
+ ),
+ DaemonDescription(
+ daemon_type='iscsi',
+ daemon_id="iscsi.a",
+ hostname="host1",
+ ),
+ True
+ ),
+
+ (
+ CustomContainerSpec(
+ service_type='container',
+ service_id='hello-world',
+ image='docker.io/library/hello-world:latest',
+ ),
+ DaemonDescription(
+ daemon_type='container',
+ daemon_id='hello-world.mgr0',
+ hostname='mgr0',
+ ),
+ True
+ ),
+
+ (
+ # daemon_id only contains hostname
+ ServiceSpec(
+ service_type='cephadm-exporter',
+ ),
+ DaemonDescription(
+ daemon_type='cephadm-exporter',
+ daemon_id="testhost",
+ hostname="testhost",
+ ),
+ True
+ ),
+])
+def test_daemon_description_service_name(spec: ServiceSpec,
+ dd: DaemonDescription,
+ valid: bool):
+ if valid:
+ assert spec.service_name() == dd.service_name()
+ else:
+ with pytest.raises(OrchestratorError):
+ dd.service_name()
diff --git a/src/pybind/mgr/cephadm/tests/test_template.py b/src/pybind/mgr/cephadm/tests/test_template.py
new file mode 100644
index 000000000..f67304348
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_template.py
@@ -0,0 +1,33 @@
+import pathlib
+
+import pytest
+
+from cephadm.template import TemplateMgr, UndefinedError, TemplateNotFoundError
+
+
+def test_render(cephadm_module, fs):
+ template_base = (pathlib.Path(__file__).parent / '../templates').resolve()
+ fake_template = template_base / 'foo/bar'
+ fs.create_file(fake_template, contents='{{ cephadm_managed }}{{ var }}')
+
+ template_mgr = TemplateMgr(cephadm_module)
+ value = 'test'
+
+ # with base context
+ expected_text = '{}{}'.format(template_mgr.base_context['cephadm_managed'], value)
+ assert template_mgr.render('foo/bar', {'var': value}) == expected_text
+
+ # without base context
+ with pytest.raises(UndefinedError):
+ template_mgr.render('foo/bar', {'var': value}, managed_context=False)
+
+ # override the base context
+ context = {
+ 'cephadm_managed': 'abc',
+ 'var': value
+ }
+ assert template_mgr.render('foo/bar', context) == 'abc{}'.format(value)
+
+ # template not found
+ with pytest.raises(TemplateNotFoundError):
+ template_mgr.render('foo/bar/2', {})
diff --git a/src/pybind/mgr/cephadm/tests/test_upgrade.py b/src/pybind/mgr/cephadm/tests/test_upgrade.py
new file mode 100644
index 000000000..9368f4dc3
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_upgrade.py
@@ -0,0 +1,322 @@
+import json
+from unittest import mock
+
+import pytest
+
+from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
+from cephadm import CephadmOrchestrator
+from cephadm.upgrade import CephadmUpgrade
+from cephadm.serve import CephadmServe
+from orchestrator import OrchestratorError, DaemonDescription
+from .fixtures import _run_cephadm, wait, with_host, with_service
+
+from typing import List, Tuple, Optional
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_upgrade_start(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'test'):
+ with with_host(cephadm_module, 'test2'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=2)), status_running=True):
+ assert wait(cephadm_module, cephadm_module.upgrade_start(
+ 'image_id', None)) == 'Initiating upgrade to image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_status()
+ ).target_image == 'image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_pause()
+ ) == 'Paused upgrade to image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_resume()
+ ) == 'Resumed upgrade to image_id'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_stop()
+ ) == 'Stopped upgrade to image_id'
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@pytest.mark.parametrize("use_repo_digest",
+ [
+ False,
+ True
+ ])
+def test_upgrade_run(use_repo_digest, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ with with_host(cephadm_module, 'host2'):
+ cephadm_module.set_container_image('global', 'from_image')
+ cephadm_module.use_repo_digest = use_repo_digest
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(host_pattern='*', count=2)),
+ CephadmOrchestrator.apply_mgr, '', status_running=True),\
+ mock.patch("cephadm.module.CephadmOrchestrator.lookup_release_name",
+ return_value='foo'),\
+ mock.patch("cephadm.module.CephadmOrchestrator.version",
+ new_callable=mock.PropertyMock) as version_mock,\
+ mock.patch("cephadm.module.CephadmOrchestrator.get",
+ return_value={
+ # capture fields in both mon and osd maps
+ "require_osd_release": "pacific",
+ "min_mon_release": 16,
+ }):
+ version_mock.return_value = 'ceph version 18.2.1 (somehash)'
+ assert wait(cephadm_module, cephadm_module.upgrade_start(
+ 'to_image', None)) == 'Initiating upgrade to to_image'
+
+ assert wait(cephadm_module, cephadm_module.upgrade_status()
+ ).target_image == 'to_image'
+
+ def _versions_mock(cmd):
+ return json.dumps({
+ 'mgr': {
+ 'ceph version 1.2.3 (asdf) blah': 1
+ }
+ })
+
+ cephadm_module._mon_command_mock_versions = _versions_mock
+
+ with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+ 'image_id': 'image_id',
+ 'repo_digests': ['to_image@repo_digest'],
+ 'ceph_version': 'ceph version 18.2.3 (hash)',
+ }))):
+
+ cephadm_module.upgrade._do_upgrade()
+
+ assert cephadm_module.upgrade_status is not None
+
+ with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(
+ json.dumps([
+ dict(
+ name=list(cephadm_module.cache.daemons['host1'].keys())[0],
+ style='cephadm',
+ fsid='fsid',
+ container_id='container_id',
+ container_image_id='image_id',
+ container_image_digests=['to_image@repo_digest'],
+ deployed_by=['to_image@repo_digest'],
+ version='version',
+ state='running',
+ )
+ ])
+ )):
+ CephadmServe(cephadm_module)._refresh_hosts_and_daemons()
+
+ with mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm(json.dumps({
+ 'image_id': 'image_id',
+ 'repo_digests': ['to_image@repo_digest'],
+ 'ceph_version': 'ceph version 18.2.3 (hash)',
+ }))):
+ cephadm_module.upgrade._do_upgrade()
+
+ _, image, _ = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': 'global',
+ 'key': 'container_image',
+ })
+ if use_repo_digest:
+ assert image == 'to_image@repo_digest'
+ else:
+ assert image == 'to_image'
+
+
+def test_upgrade_state_null(cephadm_module: CephadmOrchestrator):
+ # This test validates https://tracker.ceph.com/issues/47580
+ cephadm_module.set_store('upgrade_state', 'null')
+ CephadmUpgrade(cephadm_module)
+ assert CephadmUpgrade(cephadm_module).upgrade_state is None
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+def test_not_enough_mgrs(cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ with with_service(cephadm_module, ServiceSpec('mgr', placement=PlacementSpec(count=1)), CephadmOrchestrator.apply_mgr, ''):
+ with pytest.raises(OrchestratorError):
+ wait(cephadm_module, cephadm_module.upgrade_start('image_id', None))
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.CephadmOrchestrator.check_mon_command")
+def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOrchestrator):
+ # only 2 monitors, not enough for ok-to-stop to ever pass
+ check_mon_command.return_value = (
+ 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}]}}', '')
+ assert not cephadm_module.upgrade._enough_mons_for_ok_to_stop()
+
+ # 3 monitors, ok-to-stop should work fine
+ check_mon_command.return_value = (
+ 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '')
+ assert cephadm_module.upgrade._enough_mons_for_ok_to_stop()
+
+
+@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+@mock.patch("cephadm.module.HostCache.get_daemons_by_service")
+@mock.patch("cephadm.CephadmOrchestrator.get")
+def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator):
+ get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}]
+ get_daemons_by_service.side_effect = [[DaemonDescription()]]
+ assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+ DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test'))
+
+ get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}]
+ get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
+ assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+ DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
+
+ get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}]
+ get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
+ assert cephadm_module.upgrade._enough_mds_for_ok_to_stop(
+ DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
+
+
+@pytest.mark.parametrize(
+ "upgraded, not_upgraded, daemon_types, hosts, services, should_block",
+ # [ ([(type, host, id), ... ], [...], [daemon types], [hosts], [services], True/False), ... ]
+ [
+ ( # valid, upgrade mgr daemons
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ ['mgr'],
+ None,
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade mons until mgr is upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ ['mon'],
+ None,
+ None,
+ True
+ ),
+ ( # invalid, can't upgrade mon service until all mgr daemons are upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ None,
+ None,
+ ['mon'],
+ True
+ ),
+ ( # valid, upgrade mgr service
+ [],
+ [('mgr', 'a', 'a.x'), ('mon', 'a', 'a')],
+ None,
+ None,
+ ['mgr'],
+ False
+ ),
+ ( # valid, mgr is already upgraded so can upgrade mons
+ [('mgr', 'a', 'a.x')],
+ [('mon', 'a', 'a')],
+ ['mon'],
+ None,
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade all daemons on b b/c un-upgraded mgr on a
+ [],
+ [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['a'],
+ None,
+ True
+ ),
+ ( # valid, only daemon on b is a mgr
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['b'],
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['a'],
+ None,
+ True
+ ),
+ ( # valid, only upgrading the mgr on a
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ ['mgr'],
+ ['a'],
+ None,
+ False
+ ),
+ ( # valid, mgr daemon not on b are upgraded
+ [('mgr', 'a', 'a.x')],
+ [('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ None,
+ ['b'],
+ None,
+ False
+ ),
+ ( # valid, all the necessary hosts are covered, mgr on c is already upgraded
+ [('mgr', 'c', 'c.z')],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a'), ('osd', 'c', '0')],
+ None,
+ ['a', 'b'],
+ None,
+ False
+ ),
+ ( # invalid, can't upgrade mon on a while mgr on b is un-upgraded
+ [],
+ [('mgr', 'a', 'a.x'), ('mgr', 'b', 'b.y'), ('mon', 'a', 'a')],
+ ['mgr', 'mon'],
+ ['a'],
+ None,
+ True
+ ),
+ ( # valid, only mon not on "b" is upgraded already. Case hit while making teuthology test
+ [('mon', 'a', 'a')],
+ [('mon', 'b', 'x'), ('mon', 'b', 'y'), ('osd', 'a', '1'), ('osd', 'b', '2')],
+ ['mon', 'osd'],
+ ['b'],
+ None,
+ False
+ ),
+ ]
+)
+@mock.patch("cephadm.module.HostCache.get_daemons")
+@mock.patch("cephadm.serve.CephadmServe._get_container_image_info")
+@mock.patch('cephadm.module.SpecStore.__getitem__')
+def test_staggered_upgrade_validation(
+ get_spec,
+ get_image_info,
+ get_daemons,
+ upgraded: List[Tuple[str, str, str]],
+ not_upgraded: List[Tuple[str, str, str, str]],
+ daemon_types: Optional[str],
+ hosts: Optional[str],
+ services: Optional[str],
+ should_block: bool,
+ cephadm_module: CephadmOrchestrator,
+):
+ def to_dds(ts: List[Tuple[str, str]], upgraded: bool) -> List[DaemonDescription]:
+ dds = []
+ digest = 'new_image@repo_digest' if upgraded else 'old_image@repo_digest'
+ for t in ts:
+ dds.append(DaemonDescription(daemon_type=t[0],
+ hostname=t[1],
+ daemon_id=t[2],
+ container_image_digests=[digest],
+ deployed_by=[digest],))
+ return dds
+ get_daemons.return_value = to_dds(upgraded, True) + to_dds(not_upgraded, False)
+ get_image_info.return_value = ('new_id', 'ceph version 99.99.99 (hash)', ['new_image@repo_digest'])
+
+ class FakeSpecDesc():
+ def __init__(self, spec):
+ self.spec = spec
+
+ def _get_spec(s):
+ return FakeSpecDesc(ServiceSpec(s))
+
+ get_spec.side_effect = _get_spec
+ if should_block:
+ with pytest.raises(OrchestratorError):
+ cephadm_module.upgrade._validate_upgrade_filters(
+ 'new_image_name', daemon_types, hosts, services)
+ else:
+ cephadm_module.upgrade._validate_upgrade_filters(
+ 'new_image_name', daemon_types, hosts, services)
diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py
new file mode 100644
index 000000000..db39fe76a
--- /dev/null
+++ b/src/pybind/mgr/cephadm/upgrade.py
@@ -0,0 +1,1167 @@
+import json
+import logging
+import time
+import uuid
+from typing import TYPE_CHECKING, Optional, Dict, List, Tuple, Any
+
+import orchestrator
+from cephadm.registry import Registry
+from cephadm.serve import CephadmServe
+from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
+from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER, \
+ MONITORING_STACK_TYPES, CEPH_TYPES, GATEWAY_TYPES
+from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, daemon_type_to_service
+
+if TYPE_CHECKING:
+ from .module import CephadmOrchestrator
+
+
+logger = logging.getLogger(__name__)
+
+# from ceph_fs.h
+CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1 << 5)
+
+
+def normalize_image_digest(digest: str, default_registry: str) -> str:
+ """
+ Normal case:
+ >>> normalize_image_digest('ceph/ceph', 'docker.io')
+ 'docker.io/ceph/ceph'
+
+ No change:
+ >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
+ 'quay.ceph.io/ceph/ceph'
+
+ >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
+ 'docker.io/ubuntu'
+
+ >>> normalize_image_digest('localhost/ceph', 'docker.io')
+ 'localhost/ceph'
+ """
+ known_shortnames = [
+ 'ceph/ceph',
+ 'ceph/daemon',
+ 'ceph/daemon-base',
+ ]
+ for image in known_shortnames:
+ if digest.startswith(image):
+ return f'{default_registry}/{digest}'
+ return digest
+
+
+class UpgradeState:
+ def __init__(self,
+ target_name: str,
+ progress_id: str,
+ target_id: Optional[str] = None,
+ target_digests: Optional[List[str]] = None,
+ target_version: Optional[str] = None,
+ error: Optional[str] = None,
+ paused: Optional[bool] = None,
+ fs_original_max_mds: Optional[Dict[str, int]] = None,
+ fs_original_allow_standby_replay: Optional[Dict[str, bool]] = None,
+ daemon_types: Optional[List[str]] = None,
+ hosts: Optional[List[str]] = None,
+ services: Optional[List[str]] = None,
+ total_count: Optional[int] = None,
+ remaining_count: Optional[int] = None,
+ ):
+ self._target_name: str = target_name # Use CephadmUpgrade.target_image instead.
+ self.progress_id: str = progress_id
+ self.target_id: Optional[str] = target_id
+ self.target_digests: Optional[List[str]] = target_digests
+ self.target_version: Optional[str] = target_version
+ self.error: Optional[str] = error
+ self.paused: bool = paused or False
+ self.fs_original_max_mds: Optional[Dict[str, int]] = fs_original_max_mds
+ self.fs_original_allow_standby_replay: Optional[Dict[str,
+ bool]] = fs_original_allow_standby_replay
+ self.daemon_types = daemon_types
+ self.hosts = hosts
+ self.services = services
+ self.total_count = total_count
+ self.remaining_count = remaining_count
+
+ def to_json(self) -> dict:
+ return {
+ 'target_name': self._target_name,
+ 'progress_id': self.progress_id,
+ 'target_id': self.target_id,
+ 'target_digests': self.target_digests,
+ 'target_version': self.target_version,
+ 'fs_original_max_mds': self.fs_original_max_mds,
+ 'fs_original_allow_standby_replay': self.fs_original_allow_standby_replay,
+ 'error': self.error,
+ 'paused': self.paused,
+ 'daemon_types': self.daemon_types,
+ 'hosts': self.hosts,
+ 'services': self.services,
+ 'total_count': self.total_count,
+ 'remaining_count': self.remaining_count,
+ }
+
+ @classmethod
+ def from_json(cls, data: dict) -> Optional['UpgradeState']:
+ valid_params = UpgradeState.__init__.__code__.co_varnames
+ if data:
+ c = {k: v for k, v in data.items() if k in valid_params}
+ if 'repo_digest' in c:
+ c['target_digests'] = [c.pop('repo_digest')]
+ return cls(**c)
+ else:
+ return None
+
+
+class CephadmUpgrade:
+ UPGRADE_ERRORS = [
+ 'UPGRADE_NO_STANDBY_MGR',
+ 'UPGRADE_FAILED_PULL',
+ 'UPGRADE_REDEPLOY_DAEMON',
+ 'UPGRADE_BAD_TARGET_VERSION',
+ 'UPGRADE_EXCEPTION'
+ ]
+
+ def __init__(self, mgr: "CephadmOrchestrator"):
+ self.mgr = mgr
+
+ t = self.mgr.get_store('upgrade_state')
+ if t:
+ self.upgrade_state: Optional[UpgradeState] = UpgradeState.from_json(json.loads(t))
+ else:
+ self.upgrade_state = None
+
+ @property
+ def target_image(self) -> str:
+ assert self.upgrade_state
+ if not self.mgr.use_repo_digest:
+ return self.upgrade_state._target_name
+ if not self.upgrade_state.target_digests:
+ return self.upgrade_state._target_name
+
+ # FIXME: we assume the first digest is the best one to use
+ return self.upgrade_state.target_digests[0]
+
+ def upgrade_status(self) -> orchestrator.UpgradeStatusSpec:
+ r = orchestrator.UpgradeStatusSpec()
+ if self.upgrade_state:
+ r.target_image = self.target_image
+ r.in_progress = True
+ r.progress, r.services_complete = self._get_upgrade_info()
+ r.is_paused = self.upgrade_state.paused
+
+ if self.upgrade_state.daemon_types is not None:
+ which_str = f'Upgrading daemons of type(s) {",".join(self.upgrade_state.daemon_types)}'
+ if self.upgrade_state.hosts is not None:
+ which_str += f' on host(s) {",".join(self.upgrade_state.hosts)}'
+ elif self.upgrade_state.services is not None:
+ which_str = f'Upgrading daemons in service(s) {",".join(self.upgrade_state.services)}'
+ if self.upgrade_state.hosts is not None:
+ which_str += f' on host(s) {",".join(self.upgrade_state.hosts)}'
+ elif self.upgrade_state.hosts is not None:
+ which_str = f'Upgrading all daemons on host(s) {",".join(self.upgrade_state.hosts)}'
+ else:
+ which_str = 'Upgrading all daemon types on all hosts'
+ if self.upgrade_state.total_count is not None and self.upgrade_state.remaining_count is not None:
+ which_str += f'. Upgrade limited to {self.upgrade_state.total_count} daemons ({self.upgrade_state.remaining_count} remaining).'
+ r.which = which_str
+
+ # accessing self.upgrade_info_str will throw an exception if it
+ # has not been set in _do_upgrade yet
+ try:
+ r.message = self.upgrade_info_str
+ except AttributeError:
+ pass
+ if self.upgrade_state.error:
+ r.message = 'Error: ' + self.upgrade_state.error
+ elif self.upgrade_state.paused:
+ r.message = 'Upgrade paused'
+ return r
+
+ def _get_upgrade_info(self) -> Tuple[str, List[str]]:
+ if not self.upgrade_state or not self.upgrade_state.target_digests:
+ return '', []
+
+ daemons = self._get_filtered_daemons()
+
+ if any(not d.container_image_digests for d in daemons if d.daemon_type == 'mgr'):
+ return '', []
+
+ completed_daemons = [(d.daemon_type, any(d in self.upgrade_state.target_digests for d in (
+ d.container_image_digests or []))) for d in daemons if d.daemon_type]
+
+ done = len([True for completion in completed_daemons if completion[1]])
+
+ completed_types = list(set([completion[0] for completion in completed_daemons if all(
+ c[1] for c in completed_daemons if c[0] == completion[0])]))
+
+ return '%s/%s daemons upgraded' % (done, len(daemons)), completed_types
+
+ def _get_filtered_daemons(self) -> List[DaemonDescription]:
+ # Return the set of daemons set to be upgraded with out current
+ # filtering parameters (or all daemons in upgrade order if no filtering
+ # parameter are set).
+ assert self.upgrade_state is not None
+ if self.upgrade_state.daemon_types is not None:
+ daemons = [d for d in self.mgr.cache.get_daemons(
+ ) if d.daemon_type in self.upgrade_state.daemon_types]
+ elif self.upgrade_state.services is not None:
+ daemons = []
+ for service in self.upgrade_state.services:
+ daemons += self.mgr.cache.get_daemons_by_service(service)
+ else:
+ daemons = [d for d in self.mgr.cache.get_daemons(
+ ) if d.daemon_type in CEPH_UPGRADE_ORDER]
+ if self.upgrade_state.hosts is not None:
+ daemons = [d for d in daemons if d.hostname in self.upgrade_state.hosts]
+ return daemons
+
+ def _get_current_version(self) -> Tuple[int, int, str]:
+ current_version = self.mgr.version.split('ceph version ')[1]
+ (current_major, current_minor, _) = current_version.split('-')[0].split('.', 2)
+ return (int(current_major), int(current_minor), current_version)
+
+ def _check_target_version(self, version: str) -> Optional[str]:
+ try:
+ (major, minor, _) = version.split('.', 2)
+ assert int(minor) >= 0
+ # patch might be a number or {number}-g{sha1}
+ except ValueError:
+ return 'version must be in the form X.Y.Z (e.g., 15.2.3)'
+ if int(major) < 15 or (int(major) == 15 and int(minor) < 2):
+ return 'cephadm only supports octopus (15.2.0) or later'
+
+ # to far a jump?
+ current_version = self.mgr.version.split('ceph version ')[1]
+ (current_major, current_minor, _) = current_version.split('-')[0].split('.', 2)
+ if int(current_major) < int(major) - 2:
+ return f'ceph can only upgrade 1 or 2 major versions at a time; {current_version} -> {version} is too big a jump'
+ if int(current_major) > int(major):
+ return f'ceph cannot downgrade major versions (from {current_version} to {version})'
+ if int(current_major) == int(major):
+ if int(current_minor) > int(minor):
+ return f'ceph cannot downgrade to a {"rc" if minor == "1" else "dev"} release'
+
+ # check mon min
+ monmap = self.mgr.get("mon_map")
+ mon_min = monmap.get("min_mon_release", 0)
+ if mon_min < int(major) - 2:
+ return f'min_mon_release ({mon_min}) < target {major} - 2; first complete an upgrade to an earlier release'
+
+ # check osd min
+ osdmap = self.mgr.get("osd_map")
+ osd_min_name = osdmap.get("require_osd_release", "argonaut")
+ osd_min = ceph_release_to_major(osd_min_name)
+ if osd_min < int(major) - 2:
+ return f'require_osd_release ({osd_min_name} or {osd_min}) < target {major} - 2; first complete an upgrade to an earlier release'
+
+ return None
+
+ def upgrade_ls(self, image: Optional[str], tags: bool) -> Dict:
+ if not image:
+ image = self.mgr.container_image_base
+ reg_name, bare_image = image.split('/', 1)
+ reg = Registry(reg_name)
+ versions = []
+ r: Dict[Any, Any] = {
+ "image": image,
+ "registry": reg_name,
+ "bare_image": bare_image,
+ }
+
+ try:
+ ls = reg.get_tags(bare_image)
+ except ValueError as e:
+ raise OrchestratorError(f'{e}')
+ if not tags:
+ for t in ls:
+ if t[0] != 'v':
+ continue
+ v = t[1:].split('.')
+ if len(v) != 3:
+ continue
+ if '-' in v[2]:
+ continue
+ versions.append('.'.join(v))
+ r["versions"] = sorted(
+ versions,
+ key=lambda k: list(map(int, k.split('.'))),
+ reverse=True
+ )
+ else:
+ r["tags"] = sorted(ls)
+ return r
+
+ def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None,
+ hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, limit: Optional[int] = None) -> str:
+ if self.mgr.mode != 'root':
+ raise OrchestratorError('upgrade is not supported in %s mode' % (
+ self.mgr.mode))
+ if version:
+ version_error = self._check_target_version(version)
+ if version_error:
+ raise OrchestratorError(version_error)
+ target_name = self.mgr.container_image_base + ':v' + version
+ elif image:
+ target_name = normalize_image_digest(image, self.mgr.default_registry)
+ else:
+ raise OrchestratorError('must specify either image or version')
+
+ if daemon_types is not None or services is not None or hosts is not None:
+ self._validate_upgrade_filters(target_name, daemon_types, hosts, services)
+
+ if self.upgrade_state:
+ if self.upgrade_state._target_name != target_name:
+ raise OrchestratorError(
+ 'Upgrade to %s (not %s) already in progress' %
+ (self.upgrade_state._target_name, target_name))
+ if self.upgrade_state.paused:
+ self.upgrade_state.paused = False
+ self._save_upgrade_state()
+ return 'Resumed upgrade to %s' % self.target_image
+ return 'Upgrade to %s in progress' % self.target_image
+
+ running_mgr_count = len([daemon for daemon in self.mgr.cache.get_daemons_by_type(
+ 'mgr') if daemon.status == DaemonDescriptionStatus.running])
+
+ if running_mgr_count < 2:
+ raise OrchestratorError('Need at least 2 running mgr daemons for upgrade')
+
+ self.mgr.log.info('Upgrade: Started with target %s' % target_name)
+ self.upgrade_state = UpgradeState(
+ target_name=target_name,
+ progress_id=str(uuid.uuid4()),
+ daemon_types=daemon_types,
+ hosts=hosts,
+ services=services,
+ total_count=limit,
+ remaining_count=limit,
+ )
+ self._update_upgrade_progress(0.0)
+ self._save_upgrade_state()
+ self._clear_upgrade_health_checks()
+ self.mgr.event.set()
+ return 'Initiating upgrade to %s' % (target_name)
+
+ def _validate_upgrade_filters(self, target_name: str, daemon_types: Optional[List[str]] = None, hosts: Optional[List[str]] = None, services: Optional[List[str]] = None) -> None:
+ def _latest_type(dtypes: List[str]) -> str:
+ # [::-1] gives the list in reverse
+ for daemon_type in CEPH_UPGRADE_ORDER[::-1]:
+ if daemon_type in dtypes:
+ return daemon_type
+ return ''
+
+ def _get_earlier_daemons(dtypes: List[str], candidates: List[DaemonDescription]) -> List[DaemonDescription]:
+ # this function takes a list of daemon types and first finds the daemon
+ # type from that list that is latest in our upgrade order. Then, from
+ # that latest type, it filters the list of candidate daemons received
+ # for daemons with types earlier in the upgrade order than the latest
+ # type found earlier. That filtered list of daemons is returned. The
+ # purpose of this function is to help in finding daemons that must have
+ # already been upgraded for the given filtering parameters (--daemon-types,
+ # --services, --hosts) to be valid.
+ latest = _latest_type(dtypes)
+ if not latest:
+ return []
+ earlier_types = '|'.join(CEPH_UPGRADE_ORDER).split(latest)[0].split('|')[:-1]
+ earlier_types = [t for t in earlier_types if t not in dtypes]
+ return [d for d in candidates if d.daemon_type in earlier_types]
+
+ if self.upgrade_state:
+ raise OrchestratorError('Cannot set values for --daemon-types, --services or --hosts when upgrade already in progress.')
+ try:
+ target_id, target_version, target_digests = CephadmServe(self.mgr)._get_container_image_info(target_name)
+ except OrchestratorError as e:
+ raise OrchestratorError(f'Failed to pull {target_name}: {str(e)}')
+ # what we need to do here is build a list of daemons that must already be upgraded
+ # in order for the user's selection of daemons to upgrade to be valid. for example,
+ # if they say --daemon-types 'osd,mds' but mons have not been upgraded, we block.
+ daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type not in MONITORING_STACK_TYPES]
+ err_msg_base = 'Cannot start upgrade. '
+ # "dtypes" will later be filled in with the types of daemons that will be upgraded with the given parameters
+ dtypes = []
+ if daemon_types is not None:
+ dtypes = daemon_types
+ if hosts is not None:
+ dtypes = [_latest_type(dtypes)]
+ other_host_daemons = [
+ d for d in daemons if d.hostname is not None and d.hostname not in hosts]
+ daemons = _get_earlier_daemons(dtypes, other_host_daemons)
+ else:
+ daemons = _get_earlier_daemons(dtypes, daemons)
+ err_msg_base += 'Daemons with types earlier in upgrade order than given types need upgrading.\n'
+ elif services is not None:
+ # for our purposes here we can effectively convert our list of services into the
+ # set of daemon types the services contain. This works because we don't allow --services
+ # and --daemon-types at the same time and we only allow services of the same type
+ sspecs = [self.mgr.spec_store[s].spec for s in services if self.mgr.spec_store[s].spec is not None]
+ stypes = list(set([s.service_type for s in sspecs]))
+ if len(stypes) != 1:
+ raise OrchestratorError('Doing upgrade by service only support services of one type at '
+ f'a time. Found service types: {stypes}')
+ for stype in stypes:
+ dtypes += orchestrator.service_to_daemon_types(stype)
+ dtypes = list(set(dtypes))
+ if hosts is not None:
+ other_host_daemons = [
+ d for d in daemons if d.hostname is not None and d.hostname not in hosts]
+ daemons = _get_earlier_daemons(dtypes, other_host_daemons)
+ else:
+ daemons = _get_earlier_daemons(dtypes, daemons)
+ err_msg_base += 'Daemons with types earlier in upgrade order than daemons from given services need upgrading.\n'
+ elif hosts is not None:
+ # hosts must be handled a bit differently. For this, we really need to find all the daemon types
+ # that reside on hosts in the list of hosts we will upgrade. Then take the type from
+ # that list that is latest in the upgrade order and check if any daemons on hosts not in the
+ # provided list of hosts have a daemon with a type earlier in the upgrade order that is not upgraded.
+ dtypes = list(set([d.daemon_type for d in daemons if d.daemon_type is not None and d.hostname in hosts]))
+ other_hosts_daemons = [d for d in daemons if d.hostname is not None and d.hostname not in hosts]
+ daemons = _get_earlier_daemons([_latest_type(dtypes)], other_hosts_daemons)
+ err_msg_base += 'Daemons with types earlier in upgrade order than daemons on given host need upgrading.\n'
+ need_upgrade_self, n1, n2, _ = self._detect_need_upgrade(daemons, target_digests)
+ if need_upgrade_self and ('mgr' not in dtypes or (daemon_types is None and services is None)):
+ # also report active mgr as needing to be upgraded. It is not included in the resulting list
+ # by default as it is treated special and handled via the need_upgrade_self bool
+ n1.insert(0, (self.mgr.mgr_service.get_active_daemon(self.mgr.cache.get_daemons_by_type('mgr')), True))
+ if n1 or n2:
+ raise OrchestratorError(f'{err_msg_base}Please first upgrade '
+ f'{", ".join(list(set([d[0].name() for d in n1] + [d[0].name() for d in n2])))}\n'
+ f'NOTE: Enforced upgrade order is: {" -> ".join(CEPH_TYPES + GATEWAY_TYPES)}')
+
+ def upgrade_pause(self) -> str:
+ if not self.upgrade_state:
+ raise OrchestratorError('No upgrade in progress')
+ if self.upgrade_state.paused:
+ return 'Upgrade to %s already paused' % self.target_image
+ self.upgrade_state.paused = True
+ self.mgr.log.info('Upgrade: Paused upgrade to %s' % self.target_image)
+ self._save_upgrade_state()
+ return 'Paused upgrade to %s' % self.target_image
+
+ def upgrade_resume(self) -> str:
+ if not self.upgrade_state:
+ raise OrchestratorError('No upgrade in progress')
+ if not self.upgrade_state.paused:
+ return 'Upgrade to %s not paused' % self.target_image
+ self.upgrade_state.paused = False
+ self.upgrade_state.error = ''
+ self.mgr.log.info('Upgrade: Resumed upgrade to %s' % self.target_image)
+ self._save_upgrade_state()
+ self.mgr.event.set()
+ return 'Resumed upgrade to %s' % self.target_image
+
+ def upgrade_stop(self) -> str:
+ if not self.upgrade_state:
+ return 'No upgrade in progress'
+ if self.upgrade_state.progress_id:
+ self.mgr.remote('progress', 'complete',
+ self.upgrade_state.progress_id)
+ target_image = self.target_image
+ self.mgr.log.info('Upgrade: Stopped')
+ self.upgrade_state = None
+ self._save_upgrade_state()
+ self._clear_upgrade_health_checks()
+ self.mgr.event.set()
+ return 'Stopped upgrade to %s' % target_image
+
+ def continue_upgrade(self) -> bool:
+ """
+ Returns false, if nothing was done.
+ :return:
+ """
+ if self.upgrade_state and not self.upgrade_state.paused:
+ try:
+ self._do_upgrade()
+ except Exception as e:
+ self._fail_upgrade('UPGRADE_EXCEPTION', {
+ 'severity': 'error',
+ 'summary': 'Upgrade: failed due to an unexpected exception',
+ 'count': 1,
+ 'detail': [f'Unexpected exception occurred during upgrade process: {str(e)}'],
+ })
+ return False
+ return True
+ return False
+
+ def _wait_for_ok_to_stop(
+ self, s: DaemonDescription,
+ known: Optional[List[str]] = None, # NOTE: output argument!
+ ) -> bool:
+ # only wait a little bit; the service might go away for something
+ assert s.daemon_type is not None
+ assert s.daemon_id is not None
+ tries = 4
+ while tries > 0:
+ if not self.upgrade_state or self.upgrade_state.paused:
+ return False
+
+ # setting force flag to retain old functionality.
+ # note that known is an output argument for ok_to_stop()
+ r = self.mgr.cephadm_services[daemon_type_to_service(s.daemon_type)].ok_to_stop([
+ s.daemon_id], known=known, force=True)
+
+ if not r.retval:
+ logger.info(f'Upgrade: {r.stdout}')
+ return True
+ logger.info(f'Upgrade: {r.stderr}')
+
+ time.sleep(15)
+ tries -= 1
+ return False
+
+ def _clear_upgrade_health_checks(self) -> None:
+ for k in self.UPGRADE_ERRORS:
+ if k in self.mgr.health_checks:
+ del self.mgr.health_checks[k]
+ self.mgr.set_health_checks(self.mgr.health_checks)
+
+ def _fail_upgrade(self, alert_id: str, alert: dict) -> None:
+ assert alert_id in self.UPGRADE_ERRORS
+ if not self.upgrade_state:
+ # this could happen if the user canceled the upgrade while we
+ # were doing something
+ return
+
+ logger.error('Upgrade: Paused due to %s: %s' % (alert_id,
+ alert['summary']))
+ self.upgrade_state.error = alert_id + ': ' + alert['summary']
+ self.upgrade_state.paused = True
+ self._save_upgrade_state()
+ self.mgr.health_checks[alert_id] = alert
+ self.mgr.set_health_checks(self.mgr.health_checks)
+
+ def _update_upgrade_progress(self, progress: float) -> None:
+ if not self.upgrade_state:
+ assert False, 'No upgrade in progress'
+
+ if not self.upgrade_state.progress_id:
+ self.upgrade_state.progress_id = str(uuid.uuid4())
+ self._save_upgrade_state()
+ self.mgr.remote('progress', 'update', self.upgrade_state.progress_id,
+ ev_msg='Upgrade to %s' % (
+ self.upgrade_state.target_version or self.target_image
+ ),
+ ev_progress=progress,
+ add_to_ceph_s=True)
+
+ def _save_upgrade_state(self) -> None:
+ if not self.upgrade_state:
+ self.mgr.set_store('upgrade_state', None)
+ return
+ self.mgr.set_store('upgrade_state', json.dumps(self.upgrade_state.to_json()))
+
+ def get_distinct_container_image_settings(self) -> Dict[str, str]:
+ # get all distinct container_image settings
+ image_settings = {}
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config dump',
+ 'format': 'json',
+ })
+ config = json.loads(out)
+ for opt in config:
+ if opt['name'] == 'container_image':
+ image_settings[opt['section']] = opt['value']
+ return image_settings
+
+ def _prepare_for_mds_upgrade(
+ self,
+ target_major: str,
+ need_upgrade: List[DaemonDescription]
+ ) -> bool:
+ # scale down all filesystems to 1 MDS
+ assert self.upgrade_state
+ if not self.upgrade_state.fs_original_max_mds:
+ self.upgrade_state.fs_original_max_mds = {}
+ if not self.upgrade_state.fs_original_allow_standby_replay:
+ self.upgrade_state.fs_original_allow_standby_replay = {}
+ fsmap = self.mgr.get("fs_map")
+ continue_upgrade = True
+ for fs in fsmap.get('filesystems', []):
+ fscid = fs["id"]
+ mdsmap = fs["mdsmap"]
+ fs_name = mdsmap["fs_name"]
+
+ # disable allow_standby_replay?
+ if mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY:
+ self.mgr.log.info('Upgrade: Disabling standby-replay for filesystem %s' % (
+ fs_name
+ ))
+ if fscid not in self.upgrade_state.fs_original_allow_standby_replay:
+ self.upgrade_state.fs_original_allow_standby_replay[fscid] = True
+ self._save_upgrade_state()
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'fs set',
+ 'fs_name': fs_name,
+ 'var': 'allow_standby_replay',
+ 'val': '0',
+ })
+ continue_upgrade = False
+ continue
+
+ # scale down this filesystem?
+ if mdsmap["max_mds"] > 1:
+ self.mgr.log.info('Upgrade: Scaling down filesystem %s' % (
+ fs_name
+ ))
+ if fscid not in self.upgrade_state.fs_original_max_mds:
+ self.upgrade_state.fs_original_max_mds[fscid] = mdsmap['max_mds']
+ self._save_upgrade_state()
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'fs set',
+ 'fs_name': fs_name,
+ 'var': 'max_mds',
+ 'val': '1',
+ })
+ continue_upgrade = False
+ continue
+
+ if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1):
+ self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (fs_name))
+ time.sleep(10)
+ continue_upgrade = False
+ continue
+
+ if len(mdsmap['up']) == 0:
+ self.mgr.log.warning("Upgrade: No mds is up; continuing upgrade procedure to poke things in the right direction")
+ # This can happen because the current version MDS have
+ # incompatible compatsets; the mons will not do any promotions.
+ # We must upgrade to continue.
+ elif len(mdsmap['up']) > 0:
+ mdss = list(mdsmap['info'].values())
+ assert len(mdss) == 1
+ lone_mds = mdss[0]
+ if lone_mds['state'] != 'up:active':
+ self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % (
+ lone_mds['name'],
+ lone_mds['state'],
+ ))
+ time.sleep(10)
+ continue_upgrade = False
+ continue
+ else:
+ assert False
+
+ return continue_upgrade
+
+ def _enough_mons_for_ok_to_stop(self) -> bool:
+ # type () -> bool
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'quorum_status',
+ })
+ try:
+ j = json.loads(out)
+ except Exception:
+ raise OrchestratorError('failed to parse quorum status')
+
+ mons = [m['name'] for m in j['monmap']['mons']]
+ return len(mons) > 2
+
+ def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool:
+ # type (DaemonDescription) -> bool
+
+ # find fs this mds daemon belongs to
+ fsmap = self.mgr.get("fs_map")
+ for fs in fsmap.get('filesystems', []):
+ mdsmap = fs["mdsmap"]
+ fs_name = mdsmap["fs_name"]
+
+ assert mds_daemon.daemon_id
+ if fs_name != mds_daemon.service_name().split('.', 1)[1]:
+ # wrong fs for this mds daemon
+ continue
+
+ # get number of mds daemons for this fs
+ mds_count = len(
+ [daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())])
+
+ # standby mds daemons for this fs?
+ if mdsmap["max_mds"] < mds_count:
+ return True
+ return False
+
+ return True # if mds has no fs it should pass ok-to-stop
+
+ def _detect_need_upgrade(self, daemons: List[DaemonDescription], target_digests: Optional[List[str]] = None) -> Tuple[bool, List[Tuple[DaemonDescription, bool]], List[Tuple[DaemonDescription, bool]], int]:
+ # this function takes a list of daemons and container digests. The purpose
+ # is to go through each daemon and check if the current container digests
+ # for that daemon match the target digests. The purpose being that we determine
+ # if a daemon is upgraded to a certain container image or not based on what
+ # container digests it has. By checking the current digests against the
+ # targets we can determine which daemons still need to be upgraded
+ need_upgrade_self = False
+ need_upgrade: List[Tuple[DaemonDescription, bool]] = []
+ need_upgrade_deployer: List[Tuple[DaemonDescription, bool]] = []
+ done = 0
+ if target_digests is None:
+ target_digests = []
+ for d in daemons:
+ assert d.daemon_type is not None
+ assert d.daemon_id is not None
+ assert d.hostname is not None
+ correct_digest = False
+ if (any(d in target_digests for d in (d.container_image_digests or []))
+ or d.daemon_type in MONITORING_STACK_TYPES):
+ logger.debug('daemon %s.%s container digest correct' % (
+ d.daemon_type, d.daemon_id))
+ correct_digest = True
+ if any(d in target_digests for d in (d.deployed_by or [])):
+ logger.debug('daemon %s.%s deployed by correct version' % (
+ d.daemon_type, d.daemon_id))
+ done += 1
+ continue
+
+ if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id):
+ logger.info('Upgrade: Need to upgrade myself (mgr.%s)' %
+ self.mgr.get_mgr_id())
+ need_upgrade_self = True
+ continue
+
+ if correct_digest:
+ logger.debug('daemon %s.%s not deployed by correct version' % (
+ d.daemon_type, d.daemon_id))
+ need_upgrade_deployer.append((d, True))
+ else:
+ logger.debug('daemon %s.%s not correct (%s, %s, %s)' % (
+ d.daemon_type, d.daemon_id,
+ d.container_image_name, d.container_image_digests, d.version))
+ need_upgrade.append((d, False))
+
+ return (need_upgrade_self, need_upgrade, need_upgrade_deployer, done)
+
+ def _to_upgrade(self, need_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str) -> Tuple[bool, List[Tuple[DaemonDescription, bool]]]:
+ to_upgrade: List[Tuple[DaemonDescription, bool]] = []
+ known_ok_to_stop: List[str] = []
+ for d_entry in need_upgrade:
+ d = d_entry[0]
+ assert d.daemon_type is not None
+ assert d.daemon_id is not None
+ assert d.hostname is not None
+
+ if not d.container_image_id:
+ if d.container_image_name == target_image:
+ logger.debug(
+ 'daemon %s has unknown container_image_id but has correct image name' % (d.name()))
+ continue
+
+ if known_ok_to_stop:
+ if d.name() in known_ok_to_stop:
+ logger.info(f'Upgrade: {d.name()} is also safe to restart')
+ to_upgrade.append(d_entry)
+ continue
+
+ if d.daemon_type == 'osd':
+ # NOTE: known_ok_to_stop is an output argument for
+ # _wait_for_ok_to_stop
+ if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
+ return False, to_upgrade
+
+ if d.daemon_type == 'mon' and self._enough_mons_for_ok_to_stop():
+ if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
+ return False, to_upgrade
+
+ if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d):
+ if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
+ return False, to_upgrade
+
+ to_upgrade.append(d_entry)
+
+ # if we don't have a list of others to consider, stop now
+ if d.daemon_type in ['osd', 'mds', 'mon'] and not known_ok_to_stop:
+ break
+ return True, to_upgrade
+
+ def _upgrade_daemons(self, to_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str, target_digests: Optional[List[str]] = None) -> None:
+ assert self.upgrade_state is not None
+ num = 1
+ if target_digests is None:
+ target_digests = []
+ for d_entry in to_upgrade:
+ if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0 and not d_entry[1]:
+ self.mgr.log.info(f'Hit upgrade limit of {self.upgrade_state.total_count}. Stopping upgrade')
+ return
+ d = d_entry[0]
+ assert d.daemon_type is not None
+ assert d.daemon_id is not None
+ assert d.hostname is not None
+
+ # make sure host has latest container image
+ out, errs, code = CephadmServe(self.mgr)._run_cephadm(
+ d.hostname, '', 'inspect-image', [],
+ image=target_image, no_fsid=True, error_ok=True)
+ if code or not any(d in target_digests for d in json.loads(''.join(out)).get('repo_digests', [])):
+ logger.info('Upgrade: Pulling %s on %s' % (target_image,
+ d.hostname))
+ self.upgrade_info_str = 'Pulling %s image on host %s' % (
+ target_image, d.hostname)
+ out, errs, code = CephadmServe(self.mgr)._run_cephadm(
+ d.hostname, '', 'pull', [],
+ image=target_image, no_fsid=True, error_ok=True)
+ if code:
+ self._fail_upgrade('UPGRADE_FAILED_PULL', {
+ 'severity': 'warning',
+ 'summary': 'Upgrade: failed to pull target image',
+ 'count': 1,
+ 'detail': [
+ 'failed to pull %s on host %s' % (target_image,
+ d.hostname)],
+ })
+ return
+ r = json.loads(''.join(out))
+ if not any(d in target_digests for d in r.get('repo_digests', [])):
+ logger.info('Upgrade: image %s pull on %s got new digests %s (not %s), restarting' % (
+ target_image, d.hostname, r['repo_digests'], target_digests))
+ self.upgrade_info_str = 'Image %s pull on %s got new digests %s (not %s), restarting' % (
+ target_image, d.hostname, r['repo_digests'], target_digests)
+ self.upgrade_state.target_digests = r['repo_digests']
+ self._save_upgrade_state()
+ return
+
+ self.upgrade_info_str = 'Currently upgrading %s daemons' % (d.daemon_type)
+
+ if len(to_upgrade) > 1:
+ logger.info('Upgrade: Updating %s.%s (%d/%d)' % (d.daemon_type, d.daemon_id, num, min(len(to_upgrade), self.upgrade_state.remaining_count if self.upgrade_state.remaining_count is not None else 9999999)))
+ else:
+ logger.info('Upgrade: Updating %s.%s' %
+ (d.daemon_type, d.daemon_id))
+ action = 'Upgrading' if not d_entry[1] else 'Redeploying'
+ try:
+ daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d)
+ self.mgr._daemon_action(
+ daemon_spec,
+ 'redeploy',
+ image=target_image if not d_entry[1] else None
+ )
+ except Exception as e:
+ self._fail_upgrade('UPGRADE_REDEPLOY_DAEMON', {
+ 'severity': 'warning',
+ 'summary': f'{action} daemon {d.name()} on host {d.hostname} failed.',
+ 'count': 1,
+ 'detail': [
+ f'Upgrade daemon: {d.name()}: {e}'
+ ],
+ })
+ return
+ num += 1
+ if self.upgrade_state.remaining_count is not None and not d_entry[1]:
+ self.upgrade_state.remaining_count -= 1
+ self._save_upgrade_state()
+
+ def _handle_need_upgrade_self(self, need_upgrade_self: bool, upgrading_mgrs: bool) -> None:
+ if need_upgrade_self:
+ try:
+ self.mgr.mgr_service.fail_over()
+ except OrchestratorError as e:
+ self._fail_upgrade('UPGRADE_NO_STANDBY_MGR', {
+ 'severity': 'warning',
+ 'summary': f'Upgrade: {e}',
+ 'count': 1,
+ 'detail': [
+ 'The upgrade process needs to upgrade the mgr, '
+ 'but it needs at least one standby to proceed.',
+ ],
+ })
+ return
+
+ return # unreachable code, as fail_over never returns
+ elif upgrading_mgrs:
+ if 'UPGRADE_NO_STANDBY_MGR' in self.mgr.health_checks:
+ del self.mgr.health_checks['UPGRADE_NO_STANDBY_MGR']
+ self.mgr.set_health_checks(self.mgr.health_checks)
+
+ def _set_container_images(self, daemon_type: str, target_image: str, image_settings: Dict[str, str]) -> None:
+ # push down configs
+ daemon_type_section = name_to_config_section(daemon_type)
+ if image_settings.get(daemon_type_section) != target_image:
+ logger.info('Upgrade: Setting container_image for all %s' %
+ daemon_type)
+ self.mgr.set_container_image(daemon_type_section, target_image)
+ to_clean = []
+ for section in image_settings.keys():
+ if section.startswith(name_to_config_section(daemon_type) + '.'):
+ to_clean.append(section)
+ if to_clean:
+ logger.debug('Upgrade: Cleaning up container_image for %s' %
+ to_clean)
+ for section in to_clean:
+ ret, image, err = self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'name': 'container_image',
+ 'who': section,
+ })
+
+ def _complete_osd_upgrade(self, target_major: str, target_major_name: str) -> None:
+ osdmap = self.mgr.get("osd_map")
+ osd_min_name = osdmap.get("require_osd_release", "argonaut")
+ osd_min = ceph_release_to_major(osd_min_name)
+ if osd_min < int(target_major):
+ logger.info(
+ f'Upgrade: Setting require_osd_release to {target_major} {target_major_name}')
+ ret, _, err = self.mgr.check_mon_command({
+ 'prefix': 'osd require-osd-release',
+ 'release': target_major_name,
+ })
+
+ def _complete_mds_upgrade(self) -> None:
+ assert self.upgrade_state is not None
+ if self.upgrade_state.fs_original_max_mds:
+ for fs in self.mgr.get("fs_map")['filesystems']:
+ fscid = fs["id"]
+ fs_name = fs['mdsmap']['fs_name']
+ new_max = self.upgrade_state.fs_original_max_mds.get(fscid, 1)
+ if new_max > 1:
+ self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % (
+ fs_name, new_max
+ ))
+ ret, _, err = self.mgr.check_mon_command({
+ 'prefix': 'fs set',
+ 'fs_name': fs_name,
+ 'var': 'max_mds',
+ 'val': str(new_max),
+ })
+
+ self.upgrade_state.fs_original_max_mds = {}
+ self._save_upgrade_state()
+ if self.upgrade_state.fs_original_allow_standby_replay:
+ for fs in self.mgr.get("fs_map")['filesystems']:
+ fscid = fs["id"]
+ fs_name = fs['mdsmap']['fs_name']
+ asr = self.upgrade_state.fs_original_allow_standby_replay.get(fscid, False)
+ if asr:
+ self.mgr.log.info('Upgrade: Enabling allow_standby_replay on filesystem %s' % (
+ fs_name
+ ))
+ ret, _, err = self.mgr.check_mon_command({
+ 'prefix': 'fs set',
+ 'fs_name': fs_name,
+ 'var': 'allow_standby_replay',
+ 'val': '1'
+ })
+
+ self.upgrade_state.fs_original_allow_standby_replay = {}
+ self._save_upgrade_state()
+
+ def _mark_upgrade_complete(self) -> None:
+ if not self.upgrade_state:
+ logger.debug('_mark_upgrade_complete upgrade already marked complete, exiting')
+ return
+ logger.info('Upgrade: Complete!')
+ if self.upgrade_state.progress_id:
+ self.mgr.remote('progress', 'complete',
+ self.upgrade_state.progress_id)
+ self.upgrade_state = None
+ self._save_upgrade_state()
+
+ def _do_upgrade(self):
+ # type: () -> None
+ if not self.upgrade_state:
+ logger.debug('_do_upgrade no state, exiting')
+ return
+
+ target_image = self.target_image
+ target_id = self.upgrade_state.target_id
+ target_digests = self.upgrade_state.target_digests
+ target_version = self.upgrade_state.target_version
+
+ first = False
+ if not target_id or not target_version or not target_digests:
+ # need to learn the container hash
+ logger.info('Upgrade: First pull of %s' % target_image)
+ self.upgrade_info_str = 'Doing first pull of %s image' % (target_image)
+ try:
+ target_id, target_version, target_digests = CephadmServe(self.mgr)._get_container_image_info(
+ target_image)
+ except OrchestratorError as e:
+ self._fail_upgrade('UPGRADE_FAILED_PULL', {
+ 'severity': 'warning',
+ 'summary': 'Upgrade: failed to pull target image',
+ 'count': 1,
+ 'detail': [str(e)],
+ })
+ return
+ if not target_version:
+ self._fail_upgrade('UPGRADE_FAILED_PULL', {
+ 'severity': 'warning',
+ 'summary': 'Upgrade: failed to pull target image',
+ 'count': 1,
+ 'detail': ['unable to extract ceph version from container'],
+ })
+ return
+ self.upgrade_state.target_id = target_id
+ # extract the version portion of 'ceph version {version} ({sha1})'
+ self.upgrade_state.target_version = target_version.split(' ')[2]
+ self.upgrade_state.target_digests = target_digests
+ self._save_upgrade_state()
+ target_image = self.target_image
+ first = True
+
+ if target_digests is None:
+ target_digests = []
+ if target_version.startswith('ceph version '):
+ # tolerate/fix upgrade state from older version
+ self.upgrade_state.target_version = target_version.split(' ')[2]
+ target_version = self.upgrade_state.target_version
+ (target_major, _) = target_version.split('.', 1)
+ target_major_name = self.mgr.lookup_release_name(int(target_major))
+
+ if first:
+ logger.info('Upgrade: Target is version %s (%s)' % (
+ target_version, target_major_name))
+ logger.info('Upgrade: Target container is %s, digests %s' % (
+ target_image, target_digests))
+
+ version_error = self._check_target_version(target_version)
+ if version_error:
+ self._fail_upgrade('UPGRADE_BAD_TARGET_VERSION', {
+ 'severity': 'error',
+ 'summary': f'Upgrade: cannot upgrade/downgrade to {target_version}',
+ 'count': 1,
+ 'detail': [version_error],
+ })
+ return
+
+ image_settings = self.get_distinct_container_image_settings()
+
+ # Older monitors (pre-v16.2.5) asserted that FSMap::compat ==
+ # MDSMap::compat for all fs. This is no longer the case beginning in
+ # v16.2.5. We must disable the sanity checks during upgrade.
+ # N.B.: we don't bother confirming the operator has not already
+ # disabled this or saving the config value.
+ self.mgr.check_mon_command({
+ 'prefix': 'config set',
+ 'name': 'mon_mds_skip_sanity',
+ 'value': '1',
+ 'who': 'mon',
+ })
+
+ if self.upgrade_state.daemon_types is not None:
+ logger.debug(f'Filtering daemons to upgrade by daemon types: {self.upgrade_state.daemon_types}')
+ daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type in self.upgrade_state.daemon_types]
+ elif self.upgrade_state.services is not None:
+ logger.debug(f'Filtering daemons to upgrade by services: {self.upgrade_state.daemon_types}')
+ daemons = []
+ for service in self.upgrade_state.services:
+ daemons += self.mgr.cache.get_daemons_by_service(service)
+ else:
+ daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type in CEPH_UPGRADE_ORDER]
+ if self.upgrade_state.hosts is not None:
+ logger.debug(f'Filtering daemons to upgrade by hosts: {self.upgrade_state.hosts}')
+ daemons = [d for d in daemons if d.hostname in self.upgrade_state.hosts]
+ upgraded_daemon_count: int = 0
+ for daemon_type in CEPH_UPGRADE_ORDER:
+ if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0:
+ # we hit our limit and should end the upgrade
+ # except for cases where we only need to redeploy, but not actually upgrade
+ # the image (which we don't count towards our limit). This case only occurs with mgr
+ # and monitoring stack daemons. Additionally, this case is only valid if
+ # the active mgr is already upgraded.
+ if any(d in target_digests for d in self.mgr.get_active_mgr_digests()):
+ if daemon_type not in MONITORING_STACK_TYPES and daemon_type != 'mgr':
+ continue
+ else:
+ self._mark_upgrade_complete()
+ return
+ logger.debug('Upgrade: Checking %s daemons' % daemon_type)
+ daemons_of_type = [d for d in daemons if d.daemon_type == daemon_type]
+
+ need_upgrade_self, need_upgrade, need_upgrade_deployer, done = self._detect_need_upgrade(daemons_of_type, target_digests)
+ upgraded_daemon_count += done
+ self._update_upgrade_progress(upgraded_daemon_count / len(daemons))
+
+ # make sure mgr and monitoring stack daemons are properly redeployed in staggered upgrade scenarios
+ if daemon_type == 'mgr' or daemon_type in MONITORING_STACK_TYPES:
+ if any(d in target_digests for d in self.mgr.get_active_mgr_digests()):
+ need_upgrade_names = [d[0].name() for d in need_upgrade] + [d[0].name() for d in need_upgrade_deployer]
+ dds = [d for d in self.mgr.cache.get_daemons_by_type(daemon_type) if d.name() not in need_upgrade_names]
+ need_upgrade_active, n1, n2, __ = self._detect_need_upgrade(dds, target_digests)
+ if not n1:
+ if not need_upgrade_self and need_upgrade_active:
+ need_upgrade_self = True
+ need_upgrade_deployer += n2
+ else:
+ # no point in trying to redeploy with new version if active mgr is not on the new version
+ need_upgrade_deployer = []
+
+ if not need_upgrade_self:
+ # only after the mgr itself is upgraded can we expect daemons to have
+ # deployed_by == target_digests
+ need_upgrade += need_upgrade_deployer
+
+ # prepare filesystems for daemon upgrades?
+ if (
+ daemon_type == 'mds'
+ and need_upgrade
+ and not self._prepare_for_mds_upgrade(target_major, [d_entry[0] for d_entry in need_upgrade])
+ ):
+ return
+
+ if need_upgrade:
+ self.upgrade_info_str = 'Currently upgrading %s daemons' % (daemon_type)
+
+ _continue, to_upgrade = self._to_upgrade(need_upgrade, target_image)
+ if not _continue:
+ return
+ self._upgrade_daemons(to_upgrade, target_image, target_digests)
+ if to_upgrade:
+ return
+
+ self._handle_need_upgrade_self(need_upgrade_self, daemon_type == 'mgr')
+
+ # following bits of _do_upgrade are for completing upgrade for given
+ # types. If we haven't actually finished upgrading all the daemons
+ # of this type, we should exit the loop here
+ _, n1, n2, _ = self._detect_need_upgrade(self.mgr.cache.get_daemons_by_type(daemon_type), target_digests)
+ if n1 or n2:
+ continue
+
+ # complete mon upgrade?
+ if daemon_type == 'mon':
+ if not self.mgr.get("have_local_config_map"):
+ logger.info('Upgrade: Restarting mgr now that mons are running pacific')
+ need_upgrade_self = True
+
+ self._handle_need_upgrade_self(need_upgrade_self, daemon_type == 'mgr')
+
+ # make sure 'ceph versions' agrees
+ ret, out_ver, err = self.mgr.check_mon_command({
+ 'prefix': 'versions',
+ })
+ j = json.loads(out_ver)
+ for version, count in j.get(daemon_type, {}).items():
+ short_version = version.split(' ')[2]
+ if short_version != target_version:
+ logger.warning(
+ 'Upgrade: %d %s daemon(s) are %s != target %s' %
+ (count, daemon_type, short_version, target_version))
+
+ self._set_container_images(daemon_type, target_image, image_settings)
+
+ logger.debug('Upgrade: All %s daemons are up to date.' % daemon_type)
+
+ # complete osd upgrade?
+ if daemon_type == 'osd':
+ self._complete_osd_upgrade(target_major, target_major_name)
+
+ # complete mds upgrade?
+ if daemon_type == 'mds':
+ self._complete_mds_upgrade()
+
+ logger.debug('Upgrade: Upgraded %s daemon(s).' % daemon_type)
+
+ # clean up
+ logger.info('Upgrade: Finalizing container_image settings')
+ self.mgr.set_container_image('global', target_image)
+
+ for daemon_type in CEPH_UPGRADE_ORDER:
+ ret, image, err = self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'name': 'container_image',
+ 'who': name_to_config_section(daemon_type),
+ })
+
+ self.mgr.check_mon_command({
+ 'prefix': 'config rm',
+ 'name': 'mon_mds_skip_sanity',
+ 'who': 'mon',
+ })
+
+ self._mark_upgrade_complete()
+ return
diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py
new file mode 100644
index 000000000..a4940c361
--- /dev/null
+++ b/src/pybind/mgr/cephadm/utils.py
@@ -0,0 +1,136 @@
+import logging
+import json
+import socket
+from enum import Enum
+from functools import wraps
+from typing import Optional, Callable, TypeVar, List, NewType, TYPE_CHECKING, Any, NamedTuple
+from orchestrator import OrchestratorError
+
+if TYPE_CHECKING:
+ from cephadm import CephadmOrchestrator
+
+T = TypeVar('T')
+logger = logging.getLogger(__name__)
+
+ConfEntity = NewType('ConfEntity', str)
+
+
+class CephadmNoImage(Enum):
+ token = 1
+
+
+# ceph daemon types that use the ceph container image.
+# NOTE: order important here as these are used for upgrade order
+CEPH_TYPES = ['mgr', 'mon', 'crash', 'osd', 'mds', 'rgw', 'rbd-mirror', 'cephfs-mirror']
+GATEWAY_TYPES = ['iscsi', 'nfs']
+MONITORING_STACK_TYPES = ['node-exporter', 'prometheus', 'alertmanager', 'grafana']
+RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES = ['nfs']
+
+CEPH_UPGRADE_ORDER = CEPH_TYPES + GATEWAY_TYPES + MONITORING_STACK_TYPES
+
+# these daemon types use the ceph container image
+CEPH_IMAGE_TYPES = CEPH_TYPES + ['iscsi', 'nfs']
+
+# Used for _run_cephadm used for check-host etc that don't require an --image parameter
+cephadmNoImage = CephadmNoImage.token
+
+
+class ContainerInspectInfo(NamedTuple):
+ image_id: str
+ ceph_version: Optional[str]
+ repo_digests: Optional[List[str]]
+
+
+def name_to_config_section(name: str) -> ConfEntity:
+ """
+ Map from daemon names to ceph entity names (as seen in config)
+ """
+ daemon_type = name.split('.', 1)[0]
+ if daemon_type in ['rgw', 'rbd-mirror', 'nfs', 'crash', 'iscsi']:
+ return ConfEntity('client.' + name)
+ elif daemon_type in ['mon', 'osd', 'mds', 'mgr', 'client']:
+ return ConfEntity(name)
+ else:
+ return ConfEntity('mon')
+
+
+def forall_hosts(f: Callable[..., T]) -> Callable[..., List[T]]:
+ @wraps(f)
+ def forall_hosts_wrapper(*args: Any) -> List[T]:
+ from cephadm.module import CephadmOrchestrator
+
+ # Some weired logic to make calling functions with multiple arguments work.
+ if len(args) == 1:
+ vals = args[0]
+ self = None
+ elif len(args) == 2:
+ self, vals = args
+ else:
+ assert 'either f([...]) or self.f([...])'
+
+ def do_work(arg: Any) -> T:
+ if not isinstance(arg, tuple):
+ arg = (arg, )
+ try:
+ if self:
+ return f(self, *arg)
+ return f(*arg)
+ except Exception:
+ logger.exception(f'executing {f.__name__}({args}) failed.')
+ raise
+
+ assert CephadmOrchestrator.instance is not None
+ return CephadmOrchestrator.instance._worker_pool.map(do_work, vals)
+
+ return forall_hosts_wrapper
+
+
+def get_cluster_health(mgr: 'CephadmOrchestrator') -> str:
+ # check cluster health
+ ret, out, err = mgr.check_mon_command({
+ 'prefix': 'health',
+ 'format': 'json',
+ })
+ try:
+ j = json.loads(out)
+ except ValueError:
+ msg = 'Failed to parse health status: Cannot decode JSON'
+ logger.exception('%s: \'%s\'' % (msg, out))
+ raise OrchestratorError('failed to parse health status')
+
+ return j['status']
+
+
+def is_repo_digest(image_name: str) -> bool:
+ """
+ repo digest are something like "ceph/ceph@sha256:blablabla"
+ """
+ return '@' in image_name
+
+
+def resolve_ip(hostname: str) -> str:
+ try:
+ r = socket.getaddrinfo(hostname, None, flags=socket.AI_CANONNAME,
+ type=socket.SOCK_STREAM)
+ # pick first v4 IP, if present
+ for a in r:
+ if a[0] == socket.AF_INET:
+ return a[4][0]
+ return r[0][4][0]
+ except socket.gaierror as e:
+ raise OrchestratorError(f"Cannot resolve ip for host {hostname}: {e}")
+
+
+def ceph_release_to_major(release: str) -> int:
+ return ord(release[0]) - ord('a') + 1
+
+
+def file_mode_to_str(mode: int) -> str:
+ r = ''
+ for shift in range(0, 9, 3):
+ r = (
+ f'{"r" if (mode >> shift) & 4 else "-"}'
+ f'{"w" if (mode >> shift) & 2 else "-"}'
+ f'{"x" if (mode >> shift) & 1 else "-"}'
+ ) + r
+ return r
diff --git a/src/pybind/mgr/cephadm/vagrant.config.example.json b/src/pybind/mgr/cephadm/vagrant.config.example.json
new file mode 100644
index 000000000..5b1890924
--- /dev/null
+++ b/src/pybind/mgr/cephadm/vagrant.config.example.json
@@ -0,0 +1,13 @@
+/**
+ * To use a permenant config copy this file to "vagrant.config.json",
+ * edit it and remove this comment beacuase comments are not allowed
+ * in a valid JSON file.
+ */
+
+{
+ "mgrs": 1,
+ "mons": 1,
+ "osds": 1,
+ "disks": 2
+}
+